From 4464bf26d93bdcc80e0b1330fe9c9caef26b34fc Mon Sep 17 00:00:00 2001 From: V3n3RiX Date: Sat, 9 Mar 2024 01:07:53 +0000 Subject: gentoo auto-resync : 09:03:2024 - 01:07:53 --- sci-libs/Manifest.gz | Bin 44776 -> 45292 bytes sci-libs/caffe2/Manifest | 3 + sci-libs/caffe2/caffe2-2.2.1.ebuild | 230 +++++++++++++++++++++ sci-libs/caffe2/files/caffe2-2.2.1-gentoo.patch | 195 +++++++++++++++++ sci-libs/composable-kernel/Manifest | 5 + .../composable-kernel-5.7.1-r1.ebuild | 83 ++++++++ ...posable-kernel-5.7.1-enable-test-examples.patch | 19 ++ sci-libs/composable-kernel/metadata.xml | 15 ++ sci-libs/hipCUB/Manifest | 2 +- sci-libs/hipCUB/hipCUB-5.7.1-r1.ebuild | 55 +++++ sci-libs/hipCUB/hipCUB-5.7.1.ebuild | 71 ------- sci-libs/hipFFT/Manifest | 2 +- sci-libs/hipFFT/hipFFT-5.7.1-r1.ebuild | 4 +- sci-libs/hipSOLVER/Manifest | 3 + sci-libs/hipSOLVER/hipSOLVER-5.7.1.ebuild | 35 ++++ sci-libs/hipSOLVER/metadata.xml | 19 ++ sci-libs/miopen/Manifest | 6 +- .../files/miopen-5.7.1-fix-miopendriver-gemm.patch | 74 +++++++ sci-libs/miopen/metadata.xml | 3 + sci-libs/miopen/miopen-5.7.1-r1.ebuild | 122 +++++++++++ sci-libs/pytorch/Manifest | 5 +- sci-libs/pytorch/files/pytorch-2.2.1-emptyso.patch | 13 ++ sci-libs/pytorch/pytorch-2.1.2-r1.ebuild | 4 +- sci-libs/pytorch/pytorch-2.2.1.ebuild | 66 ++++++ sci-libs/rocBLAS/Manifest | 2 +- sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild | 107 ---------- sci-libs/rocBLAS/rocBLAS-5.7.1-r2.ebuild | 111 ++++++++++ sci-libs/rocFFT/Manifest | 3 +- .../files/rocFFT-5.7.1-fix-rocm-link-path.patch | 14 ++ sci-libs/rocFFT/rocFFT-5.7.1-r1.ebuild | 148 ------------- sci-libs/rocFFT/rocFFT-5.7.1-r2.ebuild | 160 ++++++++++++++ sci-libs/rocSPARSE/Manifest | 2 +- sci-libs/rocSPARSE/rocSPARSE-5.7.1-r1.ebuild | 139 ------------- sci-libs/rocSPARSE/rocSPARSE-5.7.1-r2.ebuild | 141 +++++++++++++ sci-libs/rocThrust/Manifest | 2 +- sci-libs/rocThrust/rocThrust-5.7.1.ebuild | 2 +- sci-libs/rocWMMA/Manifest | 4 + .../rocWMMA-5.7.1-use-system-googletest.patch | 25 +++ sci-libs/rocWMMA/metadata.xml | 15 ++ sci-libs/rocWMMA/rocWMMA-5.7.1.ebuild | 56 +++++ 40 files changed, 1486 insertions(+), 479 deletions(-) create mode 100644 sci-libs/caffe2/caffe2-2.2.1.ebuild create mode 100644 sci-libs/caffe2/files/caffe2-2.2.1-gentoo.patch create mode 100644 sci-libs/composable-kernel/Manifest create mode 100644 sci-libs/composable-kernel/composable-kernel-5.7.1-r1.ebuild create mode 100644 sci-libs/composable-kernel/files/composable-kernel-5.7.1-enable-test-examples.patch create mode 100644 sci-libs/composable-kernel/metadata.xml create mode 100644 sci-libs/hipCUB/hipCUB-5.7.1-r1.ebuild delete mode 100644 sci-libs/hipCUB/hipCUB-5.7.1.ebuild create mode 100644 sci-libs/hipSOLVER/Manifest create mode 100644 sci-libs/hipSOLVER/hipSOLVER-5.7.1.ebuild create mode 100644 sci-libs/hipSOLVER/metadata.xml create mode 100644 sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch create mode 100644 sci-libs/miopen/miopen-5.7.1-r1.ebuild create mode 100644 sci-libs/pytorch/files/pytorch-2.2.1-emptyso.patch create mode 100644 sci-libs/pytorch/pytorch-2.2.1.ebuild delete mode 100644 sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild create mode 100644 sci-libs/rocBLAS/rocBLAS-5.7.1-r2.ebuild create mode 100644 sci-libs/rocFFT/files/rocFFT-5.7.1-fix-rocm-link-path.patch delete mode 100644 sci-libs/rocFFT/rocFFT-5.7.1-r1.ebuild create mode 100644 sci-libs/rocFFT/rocFFT-5.7.1-r2.ebuild delete mode 100644 sci-libs/rocSPARSE/rocSPARSE-5.7.1-r1.ebuild create mode 100644 sci-libs/rocSPARSE/rocSPARSE-5.7.1-r2.ebuild create mode 100644 sci-libs/rocWMMA/Manifest create mode 100644 sci-libs/rocWMMA/files/rocWMMA-5.7.1-use-system-googletest.patch create mode 100644 sci-libs/rocWMMA/metadata.xml create mode 100644 sci-libs/rocWMMA/rocWMMA-5.7.1.ebuild (limited to 'sci-libs') diff --git a/sci-libs/Manifest.gz b/sci-libs/Manifest.gz index 7617fa6c895e..f8d1b3481441 100644 Binary files a/sci-libs/Manifest.gz and b/sci-libs/Manifest.gz differ diff --git a/sci-libs/caffe2/Manifest b/sci-libs/caffe2/Manifest index 8f5e0126911f..2bf0d73c3312 100644 --- a/sci-libs/caffe2/Manifest +++ b/sci-libs/caffe2/Manifest @@ -11,10 +11,13 @@ AUX caffe2-2.1.1-cudaExtra.patch 1336 BLAKE2B f12d29a624f9345452227076108b9ded4c AUX caffe2-2.1.1-gentoo.patch 6329 BLAKE2B c002f1605888ac79bed292cf1967eedd4d177193947dbafea8443958e498b371865a8f7548cc7e2c572d7fff69418344286071d64f4742691e943e81b73d811c SHA512 8eb2b2f89de809ff93e0deaa4bd6116f73bed55642904e689ba97e5bf21ba5754935a8d0c1f632b1f262400f0c809398524056f7691f7787c154a5b7f6f1b7b2 AUX caffe2-2.1.2-fix-openmp-link.patch 483 BLAKE2B dfe9ae0978b71449486d12f769c3c962001c138bc53b0605ed6f2215fe732356da90c5084488bc4d9f54c88aab2a426b8c935543f7ba0890783bb58da30a5cb4 SHA512 a9cee8ae2a8477a21e92bd4ef286af68efd90283593db7ed4641a826b19b4266bcab9c131c93be2b6c2f13d5cd545c766612791cf2e23cff9a731a03f3ad7648 AUX caffe2-2.1.2-fix-rpath.patch 510 BLAKE2B 6585e8089acc6e7fe69713ad04a64263ab2e9d2d5cf6d3a844dbbc917383e4b7a0688ebdfb3142e3c42108c8c14a6435f48ae4ce3adee8db338286d60ed7b503 SHA512 ede770559b487fff736aded0cf4b077d8308c2b85c5cd139150b04bcb8b72a0d78af6a2b74fbb153b75acff71df0832d8f139231d1c18558f5b5479af348e6a0 +AUX caffe2-2.2.1-gentoo.patch 6765 BLAKE2B 924338e5823825d18220c33e9168f96b5987350cf47ad26824c660dfe953f54c415a4a709d9d7bab6901687f41f8477c0615ab76773e0e689ecf91e9a7f2ef64 SHA512 e9a20bc83a1c0349927205fc3675b1ad832cb86acca3a8d2f68a3dd33f2c1fa39fb53616b603023dce217d0a29fef34e6abb6315201690a8568d2419bcb9d895 DIST pytorch-1.13.1.tar.gz 108279745 BLAKE2B 75de03b74dfdaf8d8fb5ea743fcc0c1b0e408a714ad4160c487921220a7b1755e5fa6e587e6bbc8c9f34dd75e096d2e6dd69c80d24821835fff6c833314434d3 SHA512 f16f89d027efade11d057245cad5b69a390e88b458398310ae30de2dbff7c8fd7f1165be7b8da7ea989c81ac3f5a66c5cb9050610e441a97c83fb8aa28c0bd62 DIST pytorch-2.0.1.tar.gz 111335778 BLAKE2B 7a10cc2b2d5e2422aef7e060a0c3a62ca5c7460c6e0b9becade9b98939501975c74ed5a175a653731f43ca824d2c9bd31f41d1f633c2b139779ab23d5331e9ce SHA512 2309a22b3be3ccdb36d8d9781a59a7bdcc2fdb8d95ada205702ec77862480f0cbb12cd5d6b8cd3114d01a6e33b7743d0fe9de93debf37138ca5c14403cdb0c43 DIST pytorch-2.1.2.tar.gz 116316469 BLAKE2B c5a55ee264bc3477d3556ba6376b5591117e992e56e0dd0c9ba93d12526e2727f7840f6f1e0730a38223b6492c9556840c4ebf22ffd220e97225c2abff303747 SHA512 a8961d78ad785b13c959a0612563a60e0de17a7c8bb9822ddea9a24072796354d07e81c47b6cc8761b21a6448845b088cf80e1661d9e889b0ed5474d3dc76756 +DIST pytorch-2.2.1.tar.gz 116370903 BLAKE2B 7d08e80f91bad76fba1751c30a34bebfe7145058b7758c0d47112702263a80666f70687a8860744725c6aa995e854f766a5bfa4644c23e5635e7e08c8d63a6e9 SHA512 f19ebcf59d183c3348946ba7cfcab2bc4ca93785863b8edc39dba5772083a7b0425ccb4f92a8df4dc0d18246c75e8ff812993161467fbf9dc48d7fb28a1e26f1 EBUILD caffe2-1.13.1-r6.ebuild 5244 BLAKE2B d7f5f16e1f1122604a6df64f16c62552fb8b4b0de67bd231036b4835a5a71c58da02f0e1df64f3bd22c2a282a150d7f5a803c87cdf792e3e97ec8f518e055191 SHA512 58b1a09e5e3814d9475d4fe0e46aa837477843e09ed1b0c803c2ac3190e5c819d4216e33e3003652194e90a9f3f35146657eb25f30a4419bf16b2067e5a4b027 EBUILD caffe2-2.0.1-r5.ebuild 5868 BLAKE2B c17450d01ff68d42188c9da9e9c7a6d6469fe5c8b72c91e4ea4456eb4fe9d08cf30619fbb06f3dae21add641329f3ce8ced24bca93eb05900340639c042a1cdb SHA512 d32fa4c194c51efb76b143f22f783efa7a1415902b03ddea1d3f145d632455ebfe672a1b05b1203f2ba752f3331831455ae631a66bb409f45d6c695c68f39c3b EBUILD caffe2-2.1.2-r6.ebuild 6674 BLAKE2B f12792b8f93c1ef0fccd1d8c2f46d5ce451729051d6f1cc6a04c2bd94324c5eb507e3210597ddad18be186bd42c21bcab0d7e4db2180a7d7f1b6334d94a518ab SHA512 b2f32fd716640a44b582b9c3a7d819c3da78723d93e5b8962cb32d38124ec078602066a08aaa0d0e632ddae1387d9224329fedc528d180f7d5326a54cce54e4f +EBUILD caffe2-2.2.1.ebuild 6306 BLAKE2B c7d765797d3fe09d0d8bee96c94477ca60e3074ec5c9e54b64bcef9c2bb536ba6491755f300b7cf7bfcf21d0c3819f5041638392abf80441780e779fb7e948c7 SHA512 2e293d51c576006b0d4a6ced68a513f2d3eecf0455656096869e1d7e048895575f496b314f10087e32b7680a53a22355511fcdf62e18eeaed207c3a960568236 MISC metadata.xml 1161 BLAKE2B 77145d6b17a38da3fc791b85ec6d1d8a4faa5f08485f7b8d7918f301342c9d95b9b9db9147334788ffa5137526365d0161a5e1420eabafb2058e1d85a5fa52bb SHA512 44fa18ac5e1abcfb021e8fc48db1bb9c0f61bc115484ae6f293f38c48d1f42704524490c8e3977eec8dccb728837e1fcb3ce2e892986e55044af3a15e82a61e9 diff --git a/sci-libs/caffe2/caffe2-2.2.1.ebuild b/sci-libs/caffe2/caffe2-2.2.1.ebuild new file mode 100644 index 000000000000..68328fb71624 --- /dev/null +++ b/sci-libs/caffe2/caffe2-2.2.1.ebuild @@ -0,0 +1,230 @@ +# Copyright 2022-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{9..12} ) +inherit python-single-r1 cmake cuda flag-o-matic prefix + +MYPN=pytorch +MYP=${MYPN}-${PV} + +DESCRIPTION="A deep learning framework" +HOMEPAGE="https://pytorch.org/" +SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz + -> ${MYP}.tar.gz" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~amd64" +IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack xnnpack" +RESTRICT="test" +REQUIRED_USE=" + ${PYTHON_REQUIRED_USE} + ffmpeg? ( opencv ) + mpi? ( distributed ) + gloo? ( distributed ) +" # ?? ( cuda rocm ) + +# CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122 +RDEPEND=" + ${PYTHON_DEPS} + dev-cpp/gflags:= + >=dev-cpp/glog-0.5.0 + dev-libs/cpuinfo + dev-libs/libfmt + dev-libs/protobuf:= + dev-libs/pthreadpool + dev-libs/sleef + virtual/lapack + >=sci-libs/onnx-1.12.0 + =dev-libs/cudnn-frontend-0.9.2:0/8 + dev-util/nvidia-cuda-toolkit:=[profiler] + ) + fbgemm? ( >=dev-libs/FBGEMM-2023.11.02 ) + ffmpeg? ( media-video/ffmpeg:= ) + gloo? ( sci-libs/gloo[cuda?] ) + mpi? ( virtual/mpi ) + nnpack? ( sci-libs/NNPACK ) + numpy? ( $(python_gen_cond_dep ' + dev-python/numpy[${PYTHON_USEDEP}] + ') ) + onednn? ( dev-libs/oneDNN ) + opencl? ( virtual/opencl ) + opencv? ( media-libs/opencv:= ) + qnnpack? ( sci-libs/QNNPACK ) + distributed? ( sci-libs/tensorpipe[cuda?] ) + xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 ) + mkl? ( sci-libs/mkl ) + openblas? ( sci-libs/openblas ) +" +DEPEND=" + ${RDEPEND} + cuda? ( >=dev-libs/cutlass-3.1.0 ) + onednn? ( sci-libs/ideep ) + dev-libs/psimd + dev-libs/FP16 + dev-libs/FXdiv + dev-libs/pocketfft + dev-libs/flatbuffers + >=sci-libs/kineto-0.4.0_p20231031 + $(python_gen_cond_dep ' + dev-python/pyyaml[${PYTHON_USEDEP}] + dev-python/pybind11[${PYTHON_USEDEP}] + ') +" + +S="${WORKDIR}"/${MYP} + +PATCHES=( + "${FILESDIR}"/${P}-gentoo.patch + "${FILESDIR}"/${PN}-1.13.0-install-dirs.patch + "${FILESDIR}"/${PN}-1.12.0-glog-0.6.0.patch + "${FILESDIR}"/${PN}-1.13.1-tensorpipe.patch + "${FILESDIR}"/${PN}-2.0.0-gcc13.patch + "${FILESDIR}"/${PN}-2.0.0-cudnn_include_fix.patch + "${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch + "${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch +) + +src_prepare() { + filter-lto #bug 862672 + sed -i \ + -e "/third_party\/gloo/d" \ + cmake/Dependencies.cmake \ + || die + cmake_src_prepare + pushd torch/csrc/jit/serialization || die + flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die + popd + # prefixify the hardcoded paths, after all patches are applied + hprefixify \ + aten/CMakeLists.txt \ + caffe2/CMakeLists.txt \ + cmake/Metal.cmake \ + cmake/Modules/*.cmake \ + cmake/Modules_CUDA_fix/FindCUDNN.cmake \ + cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \ + cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \ + cmake/public/LoadHIP.cmake \ + cmake/public/cuda.cmake \ + cmake/Dependencies.cmake \ + torch/CMakeLists.txt \ + CMakeLists.txt +} + +src_configure() { + if use cuda && [[ -z ${TORCH_CUDA_ARCH_LIST} ]]; then + ewarn "WARNING: caffe2 is being built with its default CUDA compute capabilities: 3.5 and 7.0." + ewarn "These may not be optimal for your GPU." + ewarn "" + ewarn "To configure caffe2 with the CUDA compute capability that is optimal for your GPU," + ewarn "set TORCH_CUDA_ARCH_LIST in your make.conf, and re-emerge caffe2." + ewarn "For example, to use CUDA capability 7.5 & 3.5, add: TORCH_CUDA_ARCH_LIST=7.5 3.5" + ewarn "For a Maxwell model GPU, an example value would be: TORCH_CUDA_ARCH_LIST=Maxwell" + ewarn "" + ewarn "You can look up your GPU's CUDA compute capability at https://developer.nvidia.com/cuda-gpus" + ewarn "or by running /opt/cuda/extras/demo_suite/deviceQuery | grep 'CUDA Capability'" + fi + + local mycmakeargs=( + -DBUILD_CUSTOM_PROTOBUF=OFF + -DBUILD_SHARED_LIBS=ON + + -DUSE_CCACHE=OFF + -DUSE_CUDA=$(usex cuda) + -DUSE_CUDNN=$(usex cuda) + -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}" + -DBUILD_NVFUSER=$(usex cuda) + -DUSE_DISTRIBUTED=$(usex distributed) + -DUSE_MPI=$(usex mpi) + -DUSE_FAKELOWP=OFF + -DUSE_FBGEMM=$(usex fbgemm) + -DUSE_FFMPEG=$(usex ffmpeg) + -DUSE_GFLAGS=ON + -DUSE_GLOG=ON + -DUSE_GLOO=$(usex gloo) + -DUSE_KINETO=OFF # TODO + -DUSE_LEVELDB=OFF + -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma + -DUSE_MKLDNN=$(usex onednn) + -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library + -DUSE_NNPACK=$(usex nnpack) + -DUSE_QNNPACK=$(usex qnnpack) + -DUSE_XNNPACK=$(usex xnnpack) + -DUSE_SYSTEM_XNNPACK=$(usex xnnpack) + -DUSE_TENSORPIPE=$(usex distributed) + -DUSE_PYTORCH_QNNPACK=OFF + -DUSE_NUMPY=$(usex numpy) + -DUSE_OPENCL=$(usex opencl) + -DUSE_OPENCV=$(usex opencv) + -DUSE_OPENMP=$(usex openmp) + -DUSE_ROCM=OFF # TODO + -DUSE_SYSTEM_CPUINFO=ON + -DUSE_SYSTEM_PYBIND11=ON + -DUSE_UCC=OFF + -DUSE_VALGRIND=OFF + -DPYBIND11_PYTHON_VERSION="${EPYTHON#python}" + -DPYTHON_EXECUTABLE="${PYTHON}" + -DUSE_ITT=OFF + -DUSE_SYSTEM_PTHREADPOOL=ON + -DUSE_SYSTEM_FXDIV=ON + -DUSE_SYSTEM_FP16=ON + -DUSE_SYSTEM_GLOO=ON + -DUSE_SYSTEM_ONNX=ON + -DUSE_SYSTEM_SLEEF=ON + -DUSE_METAL=OFF + + -Wno-dev + -DTORCH_INSTALL_LIB_DIR="${EPREFIX}"/usr/$(get_libdir) + -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}"/usr/$(get_libdir) + ) + + if use mkl; then + mycmakeargs+=(-DBLAS=MKL) + elif use openblas; then + mycmakeargs+=(-DBLAS=OpenBLAS) + else + mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=) + fi + + if use cuda; then + addpredict "/dev/nvidiactl" # bug 867706 + addpredict "/dev/char" + + mycmakeargs+=( + -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")" + ) + fi + + if use onednn; then + mycmakeargs+=( + -DUSE_MKLDNN=ON + -DMKLDNN_FOUND=ON + -DMKLDNN_LIBRARIES=dnnl + -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl" + ) + fi + + cmake_src_configure +} + +src_install() { + cmake_src_install + + insinto "/var/lib/${PN}" + doins "${BUILD_DIR}"/CMakeCache.txt + + rm -rf python + mkdir -p python/torch/include || die + mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die + cp torch/version.py python/torch/ || die + python_domodule python/caffe2 + python_domodule python/torch + ln -s ../../../../../include/torch \ + "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269 +} diff --git a/sci-libs/caffe2/files/caffe2-2.2.1-gentoo.patch b/sci-libs/caffe2/files/caffe2-2.2.1-gentoo.patch new file mode 100644 index 000000000000..5472a2c41836 --- /dev/null +++ b/sci-libs/caffe2/files/caffe2-2.2.1-gentoo.patch @@ -0,0 +1,195 @@ +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -474,7 +474,7 @@ + endif() + + # ---[ QNNPACK +-if(USE_QNNPACK) ++if(FALSE) + set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party") + + if(NOT DEFINED QNNPACK_SOURCE_DIR) +@@ -530,7 +530,7 @@ + endif() + + # ---[ Caffe2 Int8 operators (enabled by USE_QNNPACK) depend on gemmlowp and neon2sse headers +-if(USE_QNNPACK) ++if(FALSE) + set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party") + include_directories(SYSTEM "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp") + include_directories(SYSTEM "${CAFFE2_THIRD_PARTY_ROOT}/neon2sse") +@@ -780,7 +780,7 @@ + endif() + + # ---[ FBGEMM +-if(USE_FBGEMM) ++if(FALSE) + set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party") + if(NOT DEFINED FBGEMM_SOURCE_DIR) + set(FBGEMM_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/fbgemm" CACHE STRING "FBGEMM source directory") +@@ -828,6 +828,7 @@ + endif() + + if(USE_FBGEMM) ++ list(APPEND Caffe2_DEPENDENCY_LIBS fbgemm) + caffe2_update_option(USE_FBGEMM ON) + else() + caffe2_update_option(USE_FBGEMM OFF) +@@ -1529,7 +1530,6 @@ + set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) + endif() + endif() +- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) + + add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) + if(NOT USE_SYSTEM_ONNX) +@@ -1796,7 +1796,6 @@ + # + set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) +-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) + + # Disable compiler feature checks for `fmt`. + # +@@ -1805,9 +1804,7 @@ + # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know + # `fmt` is compatible with a superset of the compilers that PyTorch is, it + # shouldn't be too bad to just disable the checks. +-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") + +-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) + + # ---[ Kineto +--- a/c10/CMakeLists.txt ++++ b/c10/CMakeLists.txt +@@ -89,7 +89,7 @@ + if(C10_USE_GLOG) + target_link_libraries(c10 PUBLIC glog::glog) + endif() +-target_link_libraries(c10 PRIVATE fmt::fmt-header-only) ++target_link_libraries(c10 PRIVATE fmt) + + if(C10_USE_NUMA) + target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR}) +--- a/torch/CMakeLists.txt ++++ b/torch/CMakeLists.txt +@@ -59,15 +59,9 @@ + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/aten/src + ${CMAKE_BINARY_DIR}/caffe2/aten/src +- ${CMAKE_BINARY_DIR}/third_party +- ${CMAKE_BINARY_DIR}/third_party/onnx + +- ${TORCH_ROOT}/third_party/valgrind-headers + +- ${TORCH_ROOT}/third_party/gloo +- ${TORCH_ROOT}/third_party/onnx +- ${TORCH_ROOT}/third_party/flatbuffers/include +- ${TORCH_ROOT}/third_party/kineto/libkineto/include ++ /usr/include/kineto + + ${TORCH_SRC_DIR}/csrc + ${TORCH_SRC_DIR}/csrc/api/include +@@ -80,7 +74,6 @@ + python::python + pybind::pybind11 + shm +- fmt::fmt-header-only + ATEN_CPU_FILES_GEN_LIB) + + if(USE_ASAN AND TARGET Sanitizer::address) +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -835,12 +835,11 @@ + # Re-include to override append_cxx_flag_if_supported from third_party/FBGEMM + include(cmake/public/utils.cmake) + if(NOT MSVC) +- string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC") ++ string(APPEND CMAKE_CXX_FLAGS " -O2") + # Eigen fails to build with some versions, so convert this to a warning + # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459 + string(APPEND CMAKE_CXX_FLAGS " -Wall") + string(APPEND CMAKE_CXX_FLAGS " -Wextra") +- append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS) +@@ -930,7 +930,6 @@ + string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0") + append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS) +- append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS) + else() + # skip unwanted includes from windows.h + add_compile_definitions(WIN32_LEAN_AND_MEAN) +--- a/cmake/public/utils.cmake ++++ b/cmake/public/utils.cmake +@@ -486,8 +486,6 @@ + endif() + + # Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression) +- target_compile_options(${libname} PRIVATE +- $<$,$,$>>:-O2>) + + endfunction() + +--- a/cmake/Codegen.cmake ++++ b/cmake/Codegen.cmake +@@ -57,7 +57,7 @@ + if(MSVC) + set(OPT_FLAG "/fp:strict ") + else(MSVC) +- set(OPT_FLAG "-O3 ") ++ set(OPT_FLAG " ") + if("${CMAKE_BUILD_TYPE}" MATCHES "Debug") + set(OPT_FLAG " ") + endif() +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -107,7 +107,7 @@ + # Note: the folders that are being commented out have not been properly + # addressed yet. + +-if(NOT MSVC AND USE_XNNPACK) ++if(FALSE) + if(NOT TARGET fxdiv) + set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") + set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") +@@ -1055,7 +1055,6 @@ + endif() + + if(NOT MSVC AND USE_XNNPACK) +- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) + endif() + + # ========================================================== +@@ -1175,8 +1174,7 @@ + target_include_directories(torch_cpu PRIVATE + ${TORCH_ROOT}/third_party/miniz-2.1.0) + +-target_include_directories(torch_cpu PRIVATE +- ${TORCH_ROOT}/third_party/kineto/libkineto/include) ++target_include_directories(torch_cpu PRIVATE /usr/include/kineto) + + if(USE_KINETO) + target_include_directories(torch_cpu PRIVATE +--- a/cmake/External/nnpack.cmake ++++ b/cmake/External/nnpack.cmake +@@ -56,7 +56,7 @@ + set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory") + set(GOOGLETEST_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/googletest" CACHE STRING "Google Test source directory") + +- if(NOT TARGET nnpack) ++ if(FALSE) + if(NOT USE_SYSTEM_PTHREADPOOL AND USE_INTERNAL_PTHREADPOOL_IMPL) + set(NNPACK_CUSTOM_THREADPOOL ON CACHE BOOL "") + endif() +--- a/functorch/CMakeLists.txt 2023-11-30 20:30:45.805209036 +0100 ++++ b/functorch/CMakeLists.txt 2023-11-30 20:31:13.284766157 +0100 +@@ -35,4 +35,4 @@ + if(NOT ${TORCH_PYTHON_LINK_FLAGS} STREQUAL "") + set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS}) + endif() +-install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}") ++install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/sci-libs/composable-kernel/Manifest b/sci-libs/composable-kernel/Manifest new file mode 100644 index 000000000000..316f4e33f9d2 --- /dev/null +++ b/sci-libs/composable-kernel/Manifest @@ -0,0 +1,5 @@ +AUX composable-kernel-5.7.1-enable-test-examples.patch 507 BLAKE2B aae585262ad83f2a4e4f07e00e79a99d8cddae9c590e503f8ea1d3ffef84b3d829e61526a8c9b7977a6d604b5310cce521fb179155aba2ffc620c45db76c51cc SHA512 447ee2ec28442d610f185af4d6733d86d183a8f451fca00eafb0d6950d0a65f5d5396a8edcb4a3a1dd4733a6524ba4b9da9ca08b1bcdbd7ecaf7600bf0b14c60 +DIST composable-kernel-5.7.1.tar.gz 2158402 BLAKE2B 660c205c2a99cd17f29a9077e9e7b8b88e41f42ab40d92d0a235e7a60683a04234cce6b2eb6110265c2f461dd132c40151b2521b7d64df4cebb7f27094d8b2b0 SHA512 3931c5dbcab59513541103ebf7446311a1e318421a9e12aeafa7a98cf0717a75e1519140e6b2fbcaf4cb90829a11f03a724ff3ccc6636139c61667ac82558c55 +DIST gtest-1.14.0_p20220421.tar.gz 849107 BLAKE2B e9c32d9c1d98959583b696430a6be411f5d69e3db96669b16cffc0f1dcad42512391c0a4733f6e829c82953ff33fa243ee782f3cf6e86436f0d290f45d1a3db7 SHA512 967aac7d85da0e216fe13b17d10f894a31d763d9b88201bac7d3dfc37600552b5472d30a166a6ef27f2778677e73fd3e43d082695a48f47824262eb9cd4fcd2f +EBUILD composable-kernel-5.7.1-r1.ebuild 1786 BLAKE2B ca0f01b34ce48f190a57b9e3b0d788f4bd8422ecae782bb7e9d9e21886bdba0dac8776f1183e57b96e9ef824cde482476746d30b1d5cba5fe93eb7cea9453ce9 SHA512 11b806f90b2a4a94d2535a86cd490a992e39ded8aa648c6b327cd407338a326375850935515338b7fd15946799118821e0d40b0c024cdaaac95cd87bdad1c560 +MISC metadata.xml 473 BLAKE2B fdeace65d8ada21cb0b0fd3a7e1c0c48338c10b51cdad675ffb33e4bd8b9da4ef83ffee85606ef436b2f0315ed323266fa28c70ac5553952ee800fe0450fb066 SHA512 38c79eb08baedccfa7e7a46ef138520d7a939379c4120883d1f8b1187624ca78d9c8dfcf5759867a0b78253b4724a4da944f7384ec27bc2f9846198c1386af9d diff --git a/sci-libs/composable-kernel/composable-kernel-5.7.1-r1.ebuild b/sci-libs/composable-kernel/composable-kernel-5.7.1-r1.ebuild new file mode 100644 index 000000000000..fbaa9356f722 --- /dev/null +++ b/sci-libs/composable-kernel/composable-kernel-5.7.1-r1.ebuild @@ -0,0 +1,83 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +ROCM_VERSION=${PV} + +inherit cmake flag-o-matic llvm rocm + +GTEST_COMMIT="b85864c64758dec007208e56af933fc3f52044ee" +GTEST_FILE="gtest-1.14.0_p20220421.tar.gz" + +LLVM_MAX_SLOT=17 + +DESCRIPTION="High Performance Composable Kernel for AMD GPUs" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/composable_kernel" +SRC_URI="https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/rocm-${PV}.tar.gz -> ${P}.tar.gz + test? ( https://github.com/google/googletest/archive/${GTEST_COMMIT}.tar.gz -> ${GTEST_FILE} )" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +IUSE="debug test" +REQUIRED_USE="${ROCM_REQUIRED_USE}" +RESTRICT="!test? ( test )" + +RDEPEND=" + dev-util/hip + >=dev-db/sqlite-3.17 + sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}] + >=dev-libs/boost-1.72 + dev-cpp/nlohmann_json + dev-cpp/frugally-deep +" + +DEPEND="${RDEPEND}" + +BDEPEND="dev-libs/half:0/1 + dev-build/rocm-cmake +" + +S="${WORKDIR}/composable_kernel-rocm-${PV}" + +PATCHES=( + "${FILESDIR}/${PN}-5.7.1-enable-test-examples.patch" +) + +src_prepare() { + sed -e '/-Werror/d' -i cmake/EnableCompilerWarnings.cmake || die + cmake_src_prepare +} + +src_configure() { + if ! use debug; then + append-cflags "-DNDEBUG" + append-cxxflags "-DNDEBUG" + CMAKE_BUILD_TYPE="Release" + else + CMAKE_BUILD_TYPE="Debug" + fi + + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=ON + -DBUILD_DEV=OFF + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr" + -DBUILD_TESTS=$(usex test ON OFF) + ) + + if use test; then + mycmakeargs+=( + -DGOOGLETEST_DIR="${WORKDIR}/googletest-${GTEST_COMMIT}" + ) + fi + + CXX=hipcc cmake_src_configure +} + +src_test() { + check_amdgpu + LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1 +} diff --git a/sci-libs/composable-kernel/files/composable-kernel-5.7.1-enable-test-examples.patch b/sci-libs/composable-kernel/files/composable-kernel-5.7.1-enable-test-examples.patch new file mode 100644 index 000000000000..dd93b8af743a --- /dev/null +++ b/sci-libs/composable-kernel/files/composable-kernel-5.7.1-enable-test-examples.patch @@ -0,0 +1,19 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -280,8 +280,14 @@ rocm_package_setup_component(profiler + ) + + add_subdirectory(library) +-add_subdirectory(example) +-add_subdirectory(test) ++option(BUILD_EXAMPLES "Build examples" OFF) ++if(BUILD_EXAMPLES) ++ add_subdirectory(example) ++endif() ++option(BUILD_TESTS "Build binaries for tests" OFF) ++if(BUILD_TESTS) ++ add_subdirectory(test) ++endif() + add_subdirectory(profiler) + + #Create an interface target for the include only files and call it "composablekernels" diff --git a/sci-libs/composable-kernel/metadata.xml b/sci-libs/composable-kernel/metadata.xml new file mode 100644 index 000000000000..9236dd2846d7 --- /dev/null +++ b/sci-libs/composable-kernel/metadata.xml @@ -0,0 +1,15 @@ + + + + + lockalsash@gmail.com + Sv. Lockal + + + sci@gentoo.org + Science Project + + + ROCmSoftwarePlatform/composable_kernel + + diff --git a/sci-libs/hipCUB/Manifest b/sci-libs/hipCUB/Manifest index ce1e7e85e365..5ddc70706c77 100644 --- a/sci-libs/hipCUB/Manifest +++ b/sci-libs/hipCUB/Manifest @@ -2,5 +2,5 @@ AUX hipCUB-4.3.0-add-memory-header.patch 421 BLAKE2B cd05a027dbf75098fd199783c0a DIST hipCUB-5.1.3.tar.gz 278021 BLAKE2B f30e567370fea80ef63a455c2c27b3f4ff241e7e405688fa4dc33d1e0ce3dffb016a1a1523b20613a7963bff061758eac0c0d110243a9a6cedbc100bb087d7d4 SHA512 dd3ca341b78cf6985ba723feeeb560c0503ab283b63067e9a5e68d12ad42f752a7f56fd60f0f0c26c2853538853e09a102cfc6006c18c31c3162fa7b536ffb9f DIST hipCUB-5.7.1.tar.gz 322980 BLAKE2B a9b2a69b47661532858f18e7296ef37be41da814bb4886f7b8b444829e6ad1374d1c3117b57e7599095253ed56ea2403c895bb820dbde9cec77c28a041af3898 SHA512 7cf7dc819a3d8bac2cf70aa077616e65d785d76bae3ad3991db7b940a061f694ac82b1e4219aa16476351f1329e3f0d09a818b0ff0b9c5ad261937879c21dca8 EBUILD hipCUB-5.1.3.ebuild 1930 BLAKE2B 9a686ada92902f9ee2fed32ac68e5599a38fd25ab639a22316293efe868701399d6d26c401d9c09b328139d94fe4fccbe8a33e6dd3eb675b7b9704c906af3ce9 SHA512 91a0e1284bb43502525120d031586b12238a71c28583135848b824e555c5ed84ab7d2258eb38945090a00def1ba060ae2ba8aefa7c2fe5cd35ae38808b4df94d -EBUILD hipCUB-5.7.1.ebuild 2024 BLAKE2B e00a79491c2676e34b8be32fbaa1b57607f30f6a90d1a0afdab7421df122b25fa33a875255edbadec4c65a930430ce1ed1df52203fc7d95d84cee8c487889a40 SHA512 f082d6f3cda7dca7715d78dad19d18598aeb5c9951aca774bc5964ccae699d921750e90af8bac60b1252a63a56b65fce7ad564414daa4b892e15067c2eac2c55 +EBUILD hipCUB-5.7.1-r1.ebuild 1303 BLAKE2B b5702011d5ef9ffc482cec9a74bf6b05bc952d8d071b38d7dfe8ae5aa48d8944ec248d23eef9bbdf25e25085cb99e5b578a291b206fb6dfb97f2ca257245b515 SHA512 97e716391c5f75d05396b46cf05f73130eefc12566c07f550d25ed38fb764a02d3bee0b77ab0cdb849261bce8dba0fe66d01e9d261a110e4a06dbde1d201ffe5 MISC metadata.xml 625 BLAKE2B 7f927357a005110b37accb6841e4248e22e8080d0e0d6eb28e50e96c84807f663faf2f85dd222e5f5af448211cc05004d371a6d57ac6f17393609e76980bda81 SHA512 7d16cebc8c7df7dcc5d63e02e88e9d16d2321485de688621fd8628b8473d5ae614d63b0578363ee3b47f5e0aa7f45da9a00950b6a09a8cc9c6e3c77ae856c798 diff --git a/sci-libs/hipCUB/hipCUB-5.7.1-r1.ebuild b/sci-libs/hipCUB/hipCUB-5.7.1-r1.ebuild new file mode 100644 index 000000000000..8ff00b145b09 --- /dev/null +++ b/sci-libs/hipCUB/hipCUB-5.7.1-r1.ebuild @@ -0,0 +1,55 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +ROCM_VERSION=${PV} + +inherit cmake rocm + +DESCRIPTION="Wrapper of rocPRIM or CUB for GPU parallel primitives" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/hipCUB" +SRC_URI="https://github.com/ROCmSoftwarePlatform/hipCUB/archive/rocm-${PV}.tar.gz -> hipCUB-${PV}.tar.gz" + +LICENSE="BSD" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" +IUSE="benchmark test" +REQUIRED_USE="${ROCM_REQUIRED_USE}" +RESTRICT="!test? ( test )" + +RDEPEND="dev-util/hip + sci-libs/rocPRIM:${SLOT}[${ROCM_USEDEP}] + benchmark? ( dev-cpp/benchmark ) + test? ( dev-cpp/gtest ) +" +DEPEND="${RDEPEND}" + +S="${WORKDIR}/hipCUB-rocm-${PV}" + +src_prepare() { + sed -e "s:set(ROCM_INSTALL_LIBDIR lib):set(ROCM_INSTALL_LIBDIR $(get_libdir)):" \ + -i cmake/ROCMExportTargetsHeaderOnly.cmake || die + + cmake_src_prepare +} + +src_configure() { + addpredict /dev/kfd + addpredict /dev/dri/ + + local mycmakeargs=( + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DBUILD_TEST=$(usex test ON OFF) + -DBUILD_BENCHMARK=$(usex benchmark ON OFF) + -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF + ) + + CXX=hipcc cmake_src_configure +} + +src_test() { + check_amdgpu + # uses HMM to fit tests to default <512M iGPU VRAM + MAKEOPTS="-j1" HIPCUB_USE_HMM="1" cmake_src_test +} diff --git a/sci-libs/hipCUB/hipCUB-5.7.1.ebuild b/sci-libs/hipCUB/hipCUB-5.7.1.ebuild deleted file mode 100644 index 0674d33b6ed5..000000000000 --- a/sci-libs/hipCUB/hipCUB-5.7.1.ebuild +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 1999-2023 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -ROCM_VERSION=${PV} - -inherit cmake rocm - -DESCRIPTION="Wrapper of rocPRIM or CUB for GPU parallel primitives" -HOMEPAGE="https://github.com/ROCmSoftwarePlatform/hipCUB" -SRC_URI="https://github.com/ROCmSoftwarePlatform/hipCUB/archive/rocm-${PV}.tar.gz -> hipCUB-${PV}.tar.gz" - -LICENSE="BSD" -KEYWORDS="~amd64" -SLOT="0/$(ver_cut 1-2)" -IUSE="benchmark test" -REQUIRED_USE="${ROCM_REQUIRED_USE}" -RESTRICT="!test? ( test )" - -RDEPEND="dev-util/hip - sci-libs/rocPRIM:${SLOT}[${ROCM_USEDEP}] - benchmark? ( dev-cpp/benchmark ) - test? ( dev-cpp/gtest ) -" -DEPEND="${RDEPEND}" - -S="${WORKDIR}/hipCUB-rocm-${PV}" - -# src_prepare() { -# sed -e "/PREFIX hipcub/d" \ -# -e "/DESTINATION/s:hipcub/include/:include/:" \ -# -e "/rocm_install_symlink_subdir(hipcub)/d" \ -# -e "/ + + + + sci@gentoo.org + Gentoo Science Project + + + xgreenlandforwyy@gmail.com + Yiyang Wu + + + lockalsash@gmail.com + Sv. Lockal + + + ROCmSoftwarePlatform/hipSOLVER + + diff --git a/sci-libs/miopen/Manifest b/sci-libs/miopen/Manifest index a881f0b2ea27..671f703e1bc3 100644 --- a/sci-libs/miopen/Manifest +++ b/sci-libs/miopen/Manifest @@ -8,6 +8,10 @@ AUX miopen-5.1.3-deprecate-clang-ocl.patch 2884 BLAKE2B 4f45f66d87dea51d9f87878f AUX miopen-5.1.3-gfx1031.patch 19268 BLAKE2B b3d10c57601fd5f39ae431d101687863de34b149ee08df25c987d18ef0bd89d8419a4a18659a22060093eba1cf97c2774e92abf8a38449a202e8770070b29dbf SHA512 44fb3445065087a3952ac143fba88addb4cc634dbd13e15d85d234c64fabba9cf7b83d8161a361e847375a2bc92efc86489e12cfa23ecce1b035d2ccb028db56 AUX miopen-5.1.3-include-array.patch 343 BLAKE2B 993a8f3e301f4c16088ca216b2d820b17ceb42eeda4550b4291a70e00592281f992bf184407d024a0f8b2207a26fef2d56c26ef95e7a452d3b97090e1335a404 SHA512 887ac13f25c1bfd524bb2a3be701bf688f002a95ef8218cb798e41feaeb0678dbbd2ffaf905c59070a125001093bf5ede1bb6a77fdfe6f7a4ff19cbccfdc6624 AUX miopen-5.1.3-no-strip.patch 608 BLAKE2B 3173d95219542a48c6dcd2b87e7157dcf483ff714c3d1c1c47aab8fc8af7b1fd4f6ac52b9d8e892cc25bff973597f4a05426fa4714b0764733b810867af59cfe SHA512 201357718203ee9a4062362541c483e069321b825708c6aa0e8892cc7722bcd520808797307e320df34c4fbf0003198f7a46717d5ff3819b1e2c3a5a3a045725 +AUX miopen-5.7.1-fix-miopendriver-gemm.patch 2818 BLAKE2B fd839ec500b297272f70cc1ee2fa87e74b8a70197cc72291c99c1372c9eeb7f2af33de8067fa1dc42d873bd96f93615074521c00f00b0676835c912ec0e2b73b SHA512 22c852082e0de5115acfa208c325cdb7870823dfe3b5984887c848b702a2fc5072c1d1944b218e27776f0e116d391c6039c3f8485ca65282e9699f3fc1885985 DIST MIOpen-5.1.3.tar.gz 88118329 BLAKE2B d24722ffc5f5dab6d6a1de2ce34193ad2f25c9a2562e38c52e010a29870f01d9ea1c56970ba0601a088c8286e97958ee95d0da27fc8082126dd2ebe5ccb36b70 SHA512 a14e28cfcb12e5061e0e7b999ef3e67fa0a0e897e31bc50e7288b8a23eb1791312e33d3b697021c2b654ccc065ae1b046c1cfd77ba8e04b0f3e87e9cc0626dcd +DIST MIOpen-5.7.1.tar.gz 100751593 BLAKE2B c5f847fe4374ab22737c281a65401125012328412d584fc09244b431ea6265d6d5028429115ee15fa8b04cbe0edd020e4e7ac8deb22561183ed76cb8c3d4d9d4 SHA512 3354b3b154f29a6337403abc5a71ec47c0b2558320c5a1b0cbfbbdb370c4fada2db12d4a19a312b5e30ca2e2302ee50ece3390603e84d132b2212a168e9523fa +DIST gtest-1.11.0_p20210611.tar.gz 887296 BLAKE2B 8f29b7028a6dd8190a113cd93398705b23b61d88bee38beaf9dcc0dfc8a463aed7fcd3719f6f1b131d4363aa57231629aaeffa108f6558efb58416cfface6d6e SHA512 cf9e7f3fd3e31ce6677eac355fb8bfe19c5b56a8ec3af8b9417d0904cdf5da92f99f7411a08131cc9fa4fc7d38e6a71fcfac993648e47b269a74a27de7607f7a EBUILD miopen-5.1.3-r1.ebuild 3131 BLAKE2B 70a9e8325d5611aaad84ebba69873c1177bad75b5a7c90ccee74dc26a3283ee555c0826203851b2ea1234837cd0ec26103c8891b5ba6fdca27a18659bcb59e4d SHA512 a61ebfdbe769d5f7d463c293df8b0f17f5a8c5e732c67d6c3be492fe0ffe433a5c9a30d5c735fb6661cdf1ea96a2edce76d7429936ff295b197522017717a36a -MISC metadata.xml 453 BLAKE2B 4c1e76cbc277e93a92a95cda405973cc72863882ab9a6729a07976e7815119e985688387c14fc5017fd6e85aaa64eaa72b900375c7f7428eb6bbb2569c5ecaa6 SHA512 0e42066a5ac8720d1b561bcf23269013d9cceff52a9ddd6a6e0af58c7d2628c335bb290e0dc6c262a0542e286f54fd07cb487dd908343fac5cfca07410df6e56 +EBUILD miopen-5.7.1-r1.ebuild 3287 BLAKE2B 926b187675d85525e3679a5a95f8c9451bd6e5758ca91d6e65b3341aa624d3c5dbfd2682960ca3d05dc241bde69a447eae8d8d582bb676dd45d7e8b9afc2c47f SHA512 823bfc7f122c9ed91ef3fca2af6ea027b163f1564a87f0d60603348888465770d0c300f97fa46194658dc14002cf78dbb55567c4b4880350f70336788bc1e886 +MISC metadata.xml 549 BLAKE2B 6bf2f670de2b8f61584d0a930dd65c4c36c251a68a232d0de5bdf3a732b242c7cbddff642ee37909fe54506c7f231c86adf53d71816f46469320e90925076fc1 SHA512 f5d23eacb988af92506012ccaa2d32a0b7e3d2c93b10bfce1eee6ac51ae113bc572a39e2386fd4816fe62ed563b0cb384d5480bd2bd44184cf77f5e53d8ded61 diff --git a/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch b/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch new file mode 100644 index 000000000000..859667f3da30 --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch @@ -0,0 +1,74 @@ +Fix uninitialized variable in MIOpenDriver gemm and restore gemmfp16 for testing +Upstream bug: https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2505 +--- a/driver/driver.hpp ++++ b/driver/driver.hpp +@@ -141,7 +141,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz) + printf("Usage: ./driver *base_arg* *other_args*\n"); + printf("Supported Base Arguments: conv[fp16|int8|bfp16], CBAInfer[fp16], " + "pool[fp16], lrn[fp16], " +- "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], " ++ "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm[fp16], ctc, dropout[fp16], " + "tensorop[fp16], reduce[fp16,fp64]\n"); + exit(0); // NOLINT (concurrency-mt-unsafe) + } +@@ -160,7 +160,7 @@ inline std::string ParseBaseArg(int argc, char* argv[]) + arg != "CBAInfer" && arg != "CBAInferfp16" && arg != "pool" && arg != "poolfp16" && + arg != "lrn" && arg != "lrnfp16" && arg != "activ" && arg != "activfp16" && + arg != "softmax" && arg != "softmaxfp16" && arg != "bnorm" && arg != "bnormfp16" && +- arg != "rnn" && arg != "rnnfp16" && arg != "gemm" /*&& arg != "gemmfp16"*/ && arg != "ctc" && ++ arg != "rnn" && arg != "rnnfp16" && arg != "gemm" && arg != "gemmfp16" && arg != "ctc" && + arg != "dropout" && arg != "dropoutfp16" && arg != "tensorop" && arg != "tensoropfp16" && + arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" && arg != "--version") + { +--- a/driver/gemm_driver.hpp ++++ b/driver/gemm_driver.hpp +@@ -207,6 +207,19 @@ int GemmDriver::GetandSetData() + gemm_desc.strideB = gemm_desc.k * gemm_desc.n; + gemm_desc.strideC = gemm_desc.m * gemm_desc.n; + ++ if constexpr (std::is_same_v) ++ { ++ gemm_desc.dataType = miopenFloat; ++ } ++ else if constexpr (std::is_same_v) ++ { ++ gemm_desc.dataType = miopenHalf; ++ } ++ else ++ { ++ static_assert(!"unsupported type"); ++ } ++ + return (0); + } + +@@ -230,9 +243,9 @@ int GemmDriver::AllocateBuffersAndCopy() + a = std::vector(a_sz); + b = std::vector(b_sz); + #if GEMM_DRIVER_DEBUG +- c = std::vector(c_sz, 1.); ++ c = std::vector(c_sz, static_cast(1.)); + #else +- c = std::vector(c_sz, 0.); ++ c = std::vector(c_sz, static_cast(0.)); + #endif + chost = c; + +--- a/driver/main.cpp ++++ b/driver/main.cpp +@@ -125,11 +125,10 @@ int main(int argc, char* argv[]) + { + drv = new GemmDriver(); + } +-// TODO half is not supported in gemm +-// else if(base_arg == "gemmfp16") +-// { +-// drv = new GemmDriver(); +-// } ++ else if(base_arg == "gemmfp16") ++ { ++ drv = new GemmDriver(); ++ } + #endif + else if(base_arg == "bnorm") + { diff --git a/sci-libs/miopen/metadata.xml b/sci-libs/miopen/metadata.xml index e74d41fd607c..08c299887077 100644 --- a/sci-libs/miopen/metadata.xml +++ b/sci-libs/miopen/metadata.xml @@ -12,4 +12,7 @@ xgreenlandforwyy@gmail.com Yiyang Wu + + ROCmSoftwarePlatform/MIOpen + diff --git a/sci-libs/miopen/miopen-5.7.1-r1.ebuild b/sci-libs/miopen/miopen-5.7.1-r1.ebuild new file mode 100644 index 000000000000..93bcde8190ae --- /dev/null +++ b/sci-libs/miopen/miopen-5.7.1-r1.ebuild @@ -0,0 +1,122 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +ROCM_VERSION=${PV} + +inherit cmake flag-o-matic llvm rocm + +GTEST_COMMIT="e2239ee6043f73722e7aa812a459f54a28552929" +GTEST_FILE="gtest-1.11.0_p20210611.tar.gz" + +LLVM_MAX_SLOT=17 + +DESCRIPTION="AMD's Machine Intelligence Library" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/MIOpen" + +SRC_URI="https://github.com/ROCmSoftwarePlatform/MIOpen/archive/rocm-${PV}.tar.gz -> MIOpen-${PV}.tar.gz + test? ( https://github.com/google/googletest/archive/${GTEST_COMMIT}.tar.gz -> ${GTEST_FILE} )" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +IUSE="debug test" +RESTRICT="!test? ( test )" + +RDEPEND=" + dev-util/hip + >=dev-db/sqlite-3.17 + sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}] + sci-libs/composable-kernel:${SLOT}[${ROCM_USEDEP}] + >=dev-libs/boost-1.72 + dev-cpp/nlohmann_json + dev-cpp/frugally-deep +" + +DEPEND="${RDEPEND}" + +BDEPEND="dev-libs/half:0/1 + dev-build/rocm-cmake +" + +S="${WORKDIR}/MIOpen-rocm-${PV}" + +PATCHES=( + "${FILESDIR}/${PN}-4.2.0-disable-no-inline-boost.patch" + "${FILESDIR}/${PN}-4.2.0-gcc11-numeric_limits.patch" + "${FILESDIR}/${PN}-4.3.0-fix-interface-include-in-HIP_COMPILER_FLAGS.patch" + "${FILESDIR}/${PN}-4.3.0-enable-test.patch" + "${FILESDIR}/${PN}-5.1.3-no-strip.patch" + "${FILESDIR}/${PN}-5.1.3-include-array.patch" + "${FILESDIR}/${PN}-5.7.1-fix-miopendriver-gemm.patch" +) + +src_prepare() { + cmake_src_prepare + + sed -e "s:/opt/rocm/llvm:$(get_llvm_prefix ${LLVM_MAX_SLOT}) NO_DEFAULT_PATH:" \ + -e "s:/opt/rocm/hip:$(hipconfig -p) NO_DEFAULT_PATH:" \ + -e '/set( MIOPEN_INSTALL_DIR/s:miopen:${CMAKE_INSTALL_PREFIX}:' \ + -e '/MIOPEN_TIDY_ERRORS ALL/d' \ + -e 's:find_program(UNZIPPER lbunzip2 bunzip2):find_program(UNZIPPER NAMES lbunzip2 bunzip2):' \ + -i CMakeLists.txt || die + + sed -e "/add_test/s:--build \${CMAKE_CURRENT_BINARY_DIR}:--build ${BUILD_DIR}:" \ + -i test/CMakeLists.txt || die + + sed -e "s:\${PROJECT_BINARY_DIR}/miopen/include:\${PROJECT_BINARY_DIR}/include:" \ + -i src/CMakeLists.txt || die + + sed -e "s:\${AMD_DEVICE_LIBS_PREFIX}/lib:${EPREFIX}/usr/lib/amdgcn/bitcode:" -i cmake/hip-config.cmake || die +} + +src_configure() { + if ! use debug; then + append-cflags "-DNDEBUG" + append-cxxflags "-DNDEBUG" + CMAKE_BUILD_TYPE="Release" + else + CMAKE_BUILD_TYPE="Debug" + fi + + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=ON + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr" + -DMIOPEN_BACKEND=HIP + -DBoost_USE_STATIC_LIBS=OFF + -DMIOPEN_USE_MLIR=OFF + -DBUILD_TESTS=$(usex test ON OFF) + -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF + -DROCM_SYMLINK_LIBS=OFF + ) + + if use test; then + mycmakeargs+=( + -DMIOPEN_TEST_ALL=ON + -DBUILD_TESTING=ON + -DMIOPEN_TEST_GDB=OFF + -DGOOGLETEST_DIR="${WORKDIR}/googletest-${GTEST_COMMIT}" + ) + for gpu_target in ${AMDGPU_TARGETS}; do + mycmakeargs+=(-DMIOPEN_TEST_${gpu_target^^}=ON ) + done + fi + + addpredict /dev/kfd + addpredict /dev/dri/ + append-cxxflags "--rocm-path=$(hipconfig -R)" + append-cxxflags "--hip-device-lib-path=${EPREFIX}/usr/lib/amdgcn/bitcode" + CXX="$(get_llvm_prefix ${LLVM_MAX_SLOT})/bin/clang++" cmake_src_configure +} + +src_test() { + check_amdgpu + LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1 +} + +src_install() { + cmake_src_install +} diff --git a/sci-libs/pytorch/Manifest b/sci-libs/pytorch/Manifest index 448585fbef94..69d7de88a98a 100644 --- a/sci-libs/pytorch/Manifest +++ b/sci-libs/pytorch/Manifest @@ -8,10 +8,13 @@ AUX pytorch-2.0.0-emptyso.patch 522 BLAKE2B 8c0dd836c19bf64846bc2fc401b917494f01 AUX pytorch-2.0.0-global-dlopen.patch 834 BLAKE2B da3ddd29124ab345e50acd4b42cb310d75d7c9b13005ee41051a1a2b702958b81677fe4f7e76dcd678dd439c89114de2e568f4330380e6e621d2975957293921 SHA512 fa0156229d7cd20e0f3949913750ecf3d8c2f0e872c8dc272cb0144427e079938588c7d4d3fec087b6cfd7846ed6844fbaa3c5510e9be8890be926ba854f7833 AUX pytorch-2.1.1-dontbuildagain.patch 443 BLAKE2B 67cc940ae5be4d3a0a5ef6670a72de4f3419582b59bb7e905d4ee704c928c353f1ef03a446ac97f0718d24e5a88fc656c8e58e26f869c6eeef5d21b918308f36 SHA512 4acf9dfca8e998a5ec1f6eebb7ffd5833f065dd995ef735c50ce34fb2553bbacc974c13eeaf07962d00de0f795fd4366a1866d1bb834ed93098217400db9dc6c AUX pytorch-2.1.1-emptyso.patch 527 BLAKE2B 104352b254b1f6a2e7b842265f9796d93c7fb23d4681961aad53ebcea8a7068a2ea9799d466555fa00db52db0136cb74d87853835e25f678c5ba641f8a76b8a0 SHA512 35aae74a0c95e9967909756af5c486e4c3555536016c2d4a37b9048e503c70f490cbf75c1d7814adcbbfeb3ba08a1e8ffac5507d8c964b28c0452f6eab03ca5b +AUX pytorch-2.2.1-emptyso.patch 464 BLAKE2B 8c93245f36218a63a910ddf41e1181c83306485ae788cc47ad191641dfed13b2f0ce8b492c9e377279a2ee8ed83c2aaac25e1ba8b4cad3d5a07c7bbc95d60063 SHA512 1f5cbd5937d20411a74812347a2a02111d80fcc73453717eb136d8e3ffc94f839234713b77c2c89b07f87404a132fd0a1d345666df8d6a3ba01284cc0b752fc2 DIST pytorch-1.13.1.tar.gz 108279745 BLAKE2B 75de03b74dfdaf8d8fb5ea743fcc0c1b0e408a714ad4160c487921220a7b1755e5fa6e587e6bbc8c9f34dd75e096d2e6dd69c80d24821835fff6c833314434d3 SHA512 f16f89d027efade11d057245cad5b69a390e88b458398310ae30de2dbff7c8fd7f1165be7b8da7ea989c81ac3f5a66c5cb9050610e441a97c83fb8aa28c0bd62 DIST pytorch-2.0.1.tar.gz 111335778 BLAKE2B 7a10cc2b2d5e2422aef7e060a0c3a62ca5c7460c6e0b9becade9b98939501975c74ed5a175a653731f43ca824d2c9bd31f41d1f633c2b139779ab23d5331e9ce SHA512 2309a22b3be3ccdb36d8d9781a59a7bdcc2fdb8d95ada205702ec77862480f0cbb12cd5d6b8cd3114d01a6e33b7743d0fe9de93debf37138ca5c14403cdb0c43 DIST pytorch-2.1.2.tar.gz 116316469 BLAKE2B c5a55ee264bc3477d3556ba6376b5591117e992e56e0dd0c9ba93d12526e2727f7840f6f1e0730a38223b6492c9556840c4ebf22ffd220e97225c2abff303747 SHA512 a8961d78ad785b13c959a0612563a60e0de17a7c8bb9822ddea9a24072796354d07e81c47b6cc8761b21a6448845b088cf80e1661d9e889b0ed5474d3dc76756 +DIST pytorch-2.2.1.tar.gz 116370903 BLAKE2B 7d08e80f91bad76fba1751c30a34bebfe7145058b7758c0d47112702263a80666f70687a8860744725c6aa995e854f766a5bfa4644c23e5635e7e08c8d63a6e9 SHA512 f19ebcf59d183c3348946ba7cfcab2bc4ca93785863b8edc39dba5772083a7b0425ccb4f92a8df4dc0d18246c75e8ff812993161467fbf9dc48d7fb28a1e26f1 EBUILD pytorch-1.13.1-r2.ebuild 1523 BLAKE2B 718a7d93fd6b59802e9f3e4033b7674f46197427214586f821939ca34f1aeb5c6a941aea35c4a0c7d6eb25c0d652d6bbbaf710d81bc1ffbc704665e4a65ec32e SHA512 aaa8f78c6e0156e7e2d9b23b0a2e3c21cbcead4540b59a3c75bc7da3968d7cdf7bbc4b6cf318aa647b039d915241a99977396c7f9a092a4d8ca51450d0979970 EBUILD pytorch-2.0.1-r1.ebuild 1651 BLAKE2B 634eb7914dd4cde80754ac182c44103ed9e2669538c8ea20b3975c7fe39a1bac22cc480b4003d23f950ff6edc667f26e6368c88a54f6f968fb709b0b4f9df908 SHA512 40db1eac8fc2fb0a3e7ed179fed59f19ce27f8888af3782074e3e41d2e8f51a9aa38babebc67a2add6e31b13df53d8b90e95599163c6d00406f6d21b8ebfe6b6 -EBUILD pytorch-2.1.2-r1.ebuild 1626 BLAKE2B e215762c8a7f7a9876dc4caa6050b96b6273e1d1aeb9b489d87a15806235003e80842285c08bb3ac77babb7b32c93cbcab6e9ec6411c54dc844adcb9f9c8da20 SHA512 a939390c5576cee3fee78f8851ceac76dccbd5c9c3ba72b89f0bf9494d201e1e92059f32d8b5a586ca3aa56b066045fe14122d9557b4ffc199bce3e81faaf90c +EBUILD pytorch-2.1.2-r1.ebuild 1616 BLAKE2B 5dc0bf21d5dde0c7f47afa6502ac550e84679f22a7030749e83e9ee145d96d4cc3a2779a063613674df43b6f186a6c09bdbead2bed10b564694913bb492717da SHA512 0936ee5f59dac44a9547be6bc3a20706f565016f7b6965de4d5b8f73c9e533b788851f8a5260a633ac0dec90d16f12f3e3212620336986ee10fe42645381f27c +EBUILD pytorch-2.2.1.ebuild 1608 BLAKE2B 83d8d7bf3fbfaf1c87d6fa500f66f574119d49e6c324b89259f53e61dd4fdad458e262790f097851a3be18aed7d621830be028f954de7d5a48e9a13481566dc8 SHA512 22bdcb0bbb600a86341b9b557de41921e29dc509c3f5940d8ec963b84d1d1c883e760b16297dd9e13998226e38427d736976eee54ceacf693398b22ccec49878 MISC metadata.xml 328 BLAKE2B f49c6d62ac88dfffe497ed05b1f727941a94acb87a809611dda8aedbd87508fdfdf94ec0e025cac2a649b0881b600236289943b4be0f6ca44fc105b6d8fbe97c SHA512 16c29d2c1177a89026235191ea3e33870c2e6b63f029b7febd4dbd2e99f598fab4b4fd2fa7a801ffd88393663a00752300e6e2cc847a83690b715bcc481d9dc2 diff --git a/sci-libs/pytorch/files/pytorch-2.2.1-emptyso.patch b/sci-libs/pytorch/files/pytorch-2.2.1-emptyso.patch new file mode 100644 index 000000000000..2b4839baec5b --- /dev/null +++ b/sci-libs/pytorch/files/pytorch-2.2.1-emptyso.patch @@ -0,0 +1,13 @@ +--- a/setup.py 2024-03-08 19:34:42.148677983 +0100 ++++ b/setup.py 2024-03-08 19:34:52.402558423 +0100 +@@ -1019,10 +1019,6 @@ + extensions.append( + Extension(name="caffe2.python.caffe2_pybind11_state_hip", sources=[]), + ) +- if cmake_cache_vars["BUILD_FUNCTORCH"]: +- extensions.append( +- Extension(name="functorch._C", sources=[]), +- ) + + cmdclass = { + "bdist_wheel": wheel_concatenate, diff --git a/sci-libs/pytorch/pytorch-2.1.2-r1.ebuild b/sci-libs/pytorch/pytorch-2.1.2-r1.ebuild index e3bdd2b2f216..40a5e8ab3df1 100644 --- a/sci-libs/pytorch/pytorch-2.1.2-r1.ebuild +++ b/sci-libs/pytorch/pytorch-2.1.2-r1.ebuild @@ -4,10 +4,10 @@ EAPI=8 DISTUTILS_USE_PEP517=setuptools -PYTHON_COMPAT=( python3_{9..12} ) +PYTHON_COMPAT=( python3_{10..12} ) DISTUTILS_SINGLE_IMPL=1 DISTUTILS_EXT=1 -inherit distutils-r1 multibuild prefix +inherit distutils-r1 prefix DESCRIPTION="Tensors and Dynamic neural networks in Python" HOMEPAGE="https://pytorch.org/" diff --git a/sci-libs/pytorch/pytorch-2.2.1.ebuild b/sci-libs/pytorch/pytorch-2.2.1.ebuild new file mode 100644 index 000000000000..442d74f6a2d6 --- /dev/null +++ b/sci-libs/pytorch/pytorch-2.2.1.ebuild @@ -0,0 +1,66 @@ +# Copyright 2022-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +DISTUTILS_USE_PEP517=setuptools +PYTHON_COMPAT=( python3_{9..12} ) +DISTUTILS_SINGLE_IMPL=1 +DISTUTILS_EXT=1 +inherit distutils-r1 prefix + +DESCRIPTION="Tensors and Dynamic neural networks in Python" +HOMEPAGE="https://pytorch.org/" +SRC_URI="https://github.com/pytorch/${PN}/archive/refs/tags/v${PV}.tar.gz + -> ${P}.tar.gz" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~amd64" +RESTRICT="test" + +REQUIRED_USE=${PYTHON_REQUIRED_USE} +RDEPEND=" + ${PYTHON_DEPS} + ~sci-libs/caffe2-${PV}[${PYTHON_SINGLE_USEDEP}] + $(python_gen_cond_dep ' + dev-python/typing-extensions[${PYTHON_USEDEP}] + dev-python/sympy[${PYTHON_USEDEP}] + ') +" +DEPEND="${RDEPEND} + $(python_gen_cond_dep ' + dev-python/pyyaml[${PYTHON_USEDEP}] + ') +" + +src_prepare() { + eapply \ + "${FILESDIR}"/${PN}-2.1.1-dontbuildagain.patch \ + "${FILESDIR}"/pytorch-1.9.0-Change-library-directory-according-to-CMake-build.patch \ + "${FILESDIR}"/${PN}-2.0.0-global-dlopen.patch \ + "${FILESDIR}"/pytorch-1.7.1-torch_shm_manager.patch \ + "${FILESDIR}"/${PN}-1.13.0-setup.patch \ + "${FILESDIR}"/${P}-emptyso.patch \ + + # Set build dir for pytorch's setup + sed -i \ + -e "/BUILD_DIR/s|build|/var/lib/caffe2/|" \ + tools/setup_helpers/env.py \ + || die + distutils-r1_src_prepare + + hprefixify tools/setup_helpers/env.py +} + +python_compile() { + PYTORCH_BUILD_VERSION=${PV} \ + PYTORCH_BUILD_NUMBER=0 \ + USE_SYSTEM_LIBS=ON \ + CMAKE_BUILD_DIR="${BUILD_DIR}" \ + distutils-r1_python_compile develop sdist +} + +python_install() { + USE_SYSTEM_LIBS=ON distutils-r1_python_install +} diff --git a/sci-libs/rocBLAS/Manifest b/sci-libs/rocBLAS/Manifest index 035cc1079971..9d9423b200c5 100644 --- a/sci-libs/rocBLAS/Manifest +++ b/sci-libs/rocBLAS/Manifest @@ -14,5 +14,5 @@ DIST rocm-rocBLAS-5.4.2.tar.gz 15938434 BLAKE2B 14ebafa944fdac443800bb7f9b16f8ec DIST rocm-rocBLAS-5.7.1.tar.gz 55389700 BLAKE2B 9e58b1d29c8a04aa58ce17fdd9b0c2504934261851619dc5b696a04e8c6a79e7c44d86a6db7a9c76a434d76593de8c191cba6f163cf398d01cfc001e5fa32b5a SHA512 cfc73f6c490595a3c153c2ac2a3cb5926c48c2c19baa07ff3dcdf3a7ccaed82cfdff64ed5a3be64c349cb43d654d4b104d9090e2ec9f2061049d6a4921c07722 EBUILD rocBLAS-5.1.3-r4.ebuild 3050 BLAKE2B 79866dab3479b657dc21aabf7140d465c144b8ea778f1f620a6dad8425883b8a2feb6eff15e936074c5947d44be104e5c391732596c185cb360592981f4cf461 SHA512 b14cf2ec87859a56f314f161fdb6f96695e6f68838699406365f619e56481c99559e6a5e115f16376ea323b9cbf6b57307c2b8531d6bb31a6339639d1eb2d734 EBUILD rocBLAS-5.4.2-r3.ebuild 2689 BLAKE2B a3713d44891b79166ff1bcf5e2290853b85545a7cbe4a0605ace885f350d8de2ba119090c9bc9ac15bd768ebb7e58ae450cd31e58402f87e3ad0ca5344aeabc7 SHA512 95da8f3dd111894bfc18f522c44bd92e34e91a3215ef89e2bc24ffa63e6fd320d04ee2c98d9261482793e583064a7d77ed364a0ad0a72270009f302c6de28703 -EBUILD rocBLAS-5.7.1-r1.ebuild 2496 BLAKE2B 32fca1db75e79e9ce3f75e4876e2a4382c79bf4c1d6d23b2510008e6693f2b1185377f594bbbd2e8c9fe3aaef05f66f40c7dddeff1226651b433c46368751f18 SHA512 b85f820e3e6381f514ebedea194a685308f81cc9681312ba9c7fb78d1579893bf6c730c576f364567427b70dfc5324a202ca89bdff9fc84c2e43e30e76245f72 +EBUILD rocBLAS-5.7.1-r2.ebuild 2718 BLAKE2B 78b35d4032fd97fa5ff63273c213ecde3b7d7900b4002da27355ce1ab8f4af2a6e796560b955df6fb3b1d517752a153ccf4a9980e31733a141b880a11f676db7 SHA512 943f734ef9a089b5a16032614813b8e528de81a4b81c81734814ab4a986e3c01a3d9e6f0bdee32efd850f476f83380cb445ac3da2a6dc102cbd18414ff19a1db MISC metadata.xml 739 BLAKE2B 38ace9571ca717a5bf7251888b4537522326f34778be98cfe717a06dbbd3a04a2bf905ec1cabafe9bbd3fea90adfb5c25c8b41e46c034a7793363ea5fc0c7f29 SHA512 60f1d5aa2a6ebe17cfcf24780529e1eef521bb7c838e48d18180a52d580cf91430ac1f4fdd0e70b9804ec5f0b5957dbd431149f6e0b0b9033cf8ad683510c582 diff --git a/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild b/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild deleted file mode 100644 index ce542d4df039..000000000000 --- a/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 1999-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -DOCS_BUILDER="doxygen" -DOCS_DIR="docs" -DOCS_DEPEND="media-gfx/graphviz" -ROCM_VERSION=${PV} -inherit cmake docs edo multiprocessing rocm - -DESCRIPTION="AMD's library for BLAS on ROCm" -HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS" -SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz -> rocm-${P}.tar.gz" -S="${WORKDIR}/${PN}-rocm-${PV}" - -LICENSE="BSD" -KEYWORDS="~amd64" -SLOT="0/$(ver_cut 1-2)" -RESTRICT="!test? ( test )" -IUSE="benchmark test" -REQUIRED_USE="${ROCM_REQUIRED_USE}" - -BDEPEND=" - >=dev-build/rocm-cmake-5.3 - dev-util/Tensile:${SLOT} - dev-python/joblib - test? ( dev-cpp/gtest ) -" - -DEPEND=" - >=dev-cpp/msgpack-cxx-6.0.0 - dev-util/hip - test? ( - virtual/blas - dev-cpp/gtest - sys-libs/libomp - ) - benchmark? ( - virtual/blas - sys-libs/libomp - ) -" - -QA_FLAGS_IGNORED="/usr/lib64/rocblas/library/.*" - -PATCHES=( - "${FILESDIR}"/${PN}-5.4.2-cpp_lib_filesystem.patch - "${FILESDIR}"/${PN}-5.4.2-add-missing-header.patch - "${FILESDIR}"/${PN}-5.4.2-link-cblas.patch - "${FILESDIR}"/${PN}-5.7.1-expand-isa-compatibility.patch - ) - -src_prepare() { - cmake_src_prepare - sed -e "s:,-rpath=.*\":\":" -i clients/CMakeLists.txt || die -} - -src_configure() { - addpredict /dev/random - addpredict /dev/kfd - addpredict /dev/dri/ - - local mycmakeargs=( - -DCMAKE_SKIP_RPATH=On - -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF - -DROCM_SYMLINK_LIBS=OFF - -DAMDGPU_TARGETS="$(get_amdgpu_flags)" - -DTensile_LOGIC="asm_full" - -DTensile_COMPILER="hipcc" - -DTensile_LIBRARY_FORMAT="msgpack" - -DTensile_CODE_OBJECT_VERSION="default" - -DTensile_ROOT="${EPREFIX}/usr/share/Tensile" - -DBUILD_WITH_TENSILE=ON - -DCMAKE_INSTALL_INCLUDEDIR="include/rocblas" - -DBUILD_CLIENTS_SAMPLES=OFF - -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) - -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF) - -DTensile_CPU_THREADS=$(makeopts_jobs) - -DBUILD_WITH_PIP=OFF - ) - - CXX=hipcc cmake_src_configure -} - -src_compile() { - docs_compile - cmake_src_compile -} - -src_test() { - check_amdgpu - cd "${BUILD_DIR}"/clients/staging || die - export ROCBLAS_TEST_TIMEOUT=3600 ROCBLAS_TENSILE_LIBPATH="${BUILD_DIR}/Tensile/library" - export LD_LIBRARY_PATH="${BUILD_DIR}/clients:${BUILD_DIR}/library/src" - edob ./${PN,,}-test -} - -src_install() { - cmake_src_install - - if use benchmark; then - cd "${BUILD_DIR}" || die - dolib.a clients/librocblas_fortran_client.a - dobin clients/staging/rocblas-bench - fi -} diff --git a/sci-libs/rocBLAS/rocBLAS-5.7.1-r2.ebuild b/sci-libs/rocBLAS/rocBLAS-5.7.1-r2.ebuild new file mode 100644 index 000000000000..ed39ae5207d8 --- /dev/null +++ b/sci-libs/rocBLAS/rocBLAS-5.7.1-r2.ebuild @@ -0,0 +1,111 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +DOCS_BUILDER="doxygen" +DOCS_DIR="docs/.doxygen" +DOCS_DEPEND="media-gfx/graphviz" +ROCM_VERSION=${PV} +inherit cmake docs edo multiprocessing rocm + +DESCRIPTION="AMD's library for BLAS on ROCm" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS" +SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz -> rocm-${P}.tar.gz" +S="${WORKDIR}/${PN}-rocm-${PV}" + +LICENSE="BSD" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" +RESTRICT="!test? ( test )" +IUSE="benchmark test" +REQUIRED_USE="${ROCM_REQUIRED_USE}" + +BDEPEND=" + >=dev-build/rocm-cmake-5.3 + dev-util/Tensile:${SLOT} + dev-python/joblib + test? ( dev-cpp/gtest ) +" + +DEPEND=" + >=dev-cpp/msgpack-cxx-6.0.0 + =dev-util/hip-5* + test? ( + virtual/blas + dev-cpp/gtest + sys-libs/libomp + ) + benchmark? ( + virtual/blas + sys-libs/libomp + ) +" + +QA_FLAGS_IGNORED="/usr/lib64/rocblas/library/.*" + +PATCHES=( + "${FILESDIR}"/${PN}-5.4.2-cpp_lib_filesystem.patch + "${FILESDIR}"/${PN}-5.4.2-add-missing-header.patch + "${FILESDIR}"/${PN}-5.4.2-link-cblas.patch + "${FILESDIR}"/${PN}-5.7.1-expand-isa-compatibility.patch + ) + +src_prepare() { + cmake_src_prepare + sed -e "s:,-rpath=.*\":\":" -i clients/CMakeLists.txt || die +} + +src_configure() { + addpredict /dev/random + addpredict /dev/kfd + addpredict /dev/dri/ + + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=On + -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF + -DROCM_SYMLINK_LIBS=OFF + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DTensile_LOGIC="asm_full" + -DTensile_COMPILER="hipcc" + -DTensile_LIBRARY_FORMAT="msgpack" + -DTensile_CODE_OBJECT_VERSION="default" + -DTensile_ROOT="${EPREFIX}/usr/share/Tensile" + -DBUILD_WITH_TENSILE=ON + -DCMAKE_INSTALL_INCLUDEDIR="include/rocblas" + -DBUILD_CLIENTS_SAMPLES=OFF + -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) + -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF) + -DTensile_CPU_THREADS=$(makeopts_jobs) + -DBUILD_WITH_PIP=OFF + ) + + CXX=hipcc cmake_src_configure +} + +src_compile() { + docs_compile + cmake_src_compile +} + +src_test() { + check_amdgpu + cd "${BUILD_DIR}"/clients/staging || die + export ROCBLAS_TEST_TIMEOUT=3600 ROCBLAS_TENSILE_LIBPATH="${BUILD_DIR}/Tensile/library" + export LD_LIBRARY_PATH="${BUILD_DIR}/clients:${BUILD_DIR}/library/src" + edob ./${PN,,}-test +} + +src_install() { + cmake_src_install + + if use benchmark; then + cd "${BUILD_DIR}" || die + dolib.a clients/librocblas_fortran_client.a + dobin clients/staging/rocblas-bench + fi + + # Stop llvm-strip from removing .strtab section from *.hsaco files, + # otherwise rocclr/elf/elf.cpp complains with "failed: null sections(STRTAB)" and crashes + dostrip -x /usr/$(get_libdir)/rocblas/library/ +} diff --git a/sci-libs/rocFFT/Manifest b/sci-libs/rocFFT/Manifest index a973e940fb99..25de558512b6 100644 --- a/sci-libs/rocFFT/Manifest +++ b/sci-libs/rocFFT/Manifest @@ -2,8 +2,9 @@ AUX rocFFT-4.2.0-add-functional-header.patch 426 BLAKE2B 82c289bb6663690163c1195 AUX rocFFT-5.0.2-add-math-header.patch 468 BLAKE2B 66cedbbc2b7aeaa2e42324e02325dc3f0c8869b109cdeb46892d597346532dcae2bd51739b85b97ecb6982935ec38f1332a4857d719b1c7c9e3a1e83f60623d1 SHA512 f714ba071838ebba8c6ab32b92eab52430622ef5f7eeea04f7a8c71b8a2a547688faf0f8d93a9be90183aebb976ec51722bcee81d960545046c82fd3df9b6f01 AUX rocFFT-5.1.3-add-stdexcept-header.patch 340 BLAKE2B b21454151dd2c41948e00b50360b3adbc93c08e7e849b11c032ba0b8dc76d1159883349065d7c87bf3098d0e67d4da48db220b58934de39fcb3c4d006a902a5e SHA512 a49f2bad5ea87f67a7d5dc7adb820a42ab2b92b0f1436a564c3906dcc4598de154d18247d8b4e8f3e12c3947da05890a4a9ba6f76852bebd1fcf26928ee48e0c AUX rocFFT-5.1.3-unbundle-sqlite.patch 2326 BLAKE2B 5a08f2ec7486f0687c4af46e09a01678e8255254bb78ae864436353d10017f86f1b3b557527a32771de43fa24148c3be5d233639bdf3e6746b2d120623bf934d SHA512 c545053741c55abf1dbcda89e9ece1bccb42a5db86c0329499b332299c5c7b73150650634cc7506bef2ee7e16a25bf1aacd5477bf13884e2cb01c48a113b33ab +AUX rocFFT-5.7.1-fix-rocm-link-path.patch 492 BLAKE2B daf4d865d8b2b5f3dc97a845f262c266588da910f517e31a10cf7896c6bc1c4fd794c01be39c7aa74d5eb80e4066b8d3dd8a8841210e0075ad4e593dc0f85f66 SHA512 b715c08515b6d289f1b045db7053ff08a27aa9cf940beacba47167a72f168d75f4652d98b33fc2bef303832f0689cf51d71f893e415ba4b4cd7a0f21c677de69 DIST rocFFT-5.1.3.tar.gz 845470 BLAKE2B 86060ce4f8612869fedc5bc92ed6a5bb7fbc539b26c7f5ab0ded5406ab6c340f64116782285d040824e54557af87d9ddb6f907fd1550b80c3da903d3c1d5e5c0 SHA512 175402856bd4753e74ce14326569be99221e21d0ce675e007900f6982c53e1566ad0b060eec545e0f17ff4dc4811688cc1003dc986226205ee95ee6f22d1ca43 DIST rocFFT-5.7.1.tar.gz 1192841 BLAKE2B 58f880f7ff866f85af70666d17e8b1a3ac8e63e853520546f746d74cecd805687fd689367a09d2b1ec4e1059b44b858377dce362cd4cae91cb85aad7c3185c00 SHA512 48467589cb4bc1334a4074bacc6a726c850dc2a79ef82622301d61c4269fb81c0b2b9fce56d72bddca584d1797a8634d243a9a6b8cf943e687b2b0f5c931116f EBUILD rocFFT-5.1.3-r1.ebuild 3921 BLAKE2B 98eb96a54b1f73e0674299da7300c68ccee073a9a87a27213f865d8cc0b229078416b7fb4a98ade73ea13baa59d3e57c3117c457e59e710177287d726fe6f189 SHA512 a3feeec5c089171f6476bcfe5f1a8d154a1117be087f57151d4efc4403dcefb689dfee8241b7ad3651c8b008106314a7f5f8ae65a5147cc617beef1e63b1b803 -EBUILD rocFFT-5.7.1-r1.ebuild 3695 BLAKE2B a9d350012d221c762662ef87d36aad5d9a0754e96620558c868ac81f9af9ea92f6ea52b568dd4e20a3127bdf26d2b4a6d3ef976c0b103339d39a5dc93c0aa879 SHA512 c74a0555cbd1b0e8897b56b1ee29a31a8925b0cbbb0865e586347568a3f48e4f9fc8286fe8ef6999453bb913e1d4990a48630159fbfb32defb3010b962361e22 +EBUILD rocFFT-5.7.1-r2.ebuild 3945 BLAKE2B e663c5ef2aa609daec68644c943a4b4dbb7963f1506ef6dcd6c907c0b2bf9f5cddb768fb7e37503e5aba02c998655785bb44412205e2c5575fa95cf9b98a4dce SHA512 f49656ba5e5691bd83a3e18c22c343b254bafe3b8ea893b482e32421e6612cd4cb3cf14607751ffd78f8b0bfe56057ad563b624e27036a80b4db4b6e02f94fe9 MISC metadata.xml 676 BLAKE2B dbf1364ede458c08dde2ebd4a8ca7053a1dc00813f647fc09268ef5a97150945c19b2375f13cc74aec8ff0af831625221bfa675b5ded7769ff5e1a322ae8b5dc SHA512 352c744ece6202b976170615927c09b82513a73669cd5efc5ca626ac2b6929d18f8ecd0b8a91d87bef86aab17713158f1e7efaef0aee01b7ac6e7d6cd2a3a4f3 diff --git a/sci-libs/rocFFT/files/rocFFT-5.7.1-fix-rocm-link-path.patch b/sci-libs/rocFFT/files/rocFFT-5.7.1-fix-rocm-link-path.patch new file mode 100644 index 000000000000..03731516d8fb --- /dev/null +++ b/sci-libs/rocFFT/files/rocFFT-5.7.1-fix-rocm-link-path.patch @@ -0,0 +1,14 @@ +ld.lld with "-L/usr/lib -lhiprtc -ldl" fails in attempt to link +64-bit library to 32-bit /usr/lib/libdl.so.2 with: +"ld.lld: error: <...>/Scrt1.o is incompatible with elf32-i386" +--- a/library/src/CMakeLists.txt ++++ b/library/src/CMakeLists.txt +@@ -46,7 +46,7 @@ else() + if( WIN32 ) + set( ROCFFT_RTC_LINK_LIBS "${HIP_PATH}/lib/hiprtc.lib" ) + else() +- set( ROCFFT_RTC_LINK_LIBS -L${ROCM_PATH}/lib -lhiprtc -ldl ) ++ set( ROCFFT_RTC_LINK_LIBS -lhiprtc -ldl ) + endif() + endif() + diff --git a/sci-libs/rocFFT/rocFFT-5.7.1-r1.ebuild b/sci-libs/rocFFT/rocFFT-5.7.1-r1.ebuild deleted file mode 100644 index 0fba2a7508a3..000000000000 --- a/sci-libs/rocFFT/rocFFT-5.7.1-r1.ebuild +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 1999-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -PYTHON_COMPAT=( python3_{10..11} ) -ROCM_VERSION=${PV} - -inherit cmake check-reqs edo multiprocessing python-r1 rocm - -DESCRIPTION="Next generation FFT implementation for ROCm" -HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocFFT" -SRC_URI="https://github.com/ROCmSoftwarePlatform/rocFFT/archive/rocm-${PV}.tar.gz -> rocFFT-${PV}.tar.gz" - -LICENSE="MIT" -KEYWORDS="~amd64" -SLOT="0/$(ver_cut 1-2)" - -# RDEPEND: perfscripts? dev-python/plotly[${PYTHON_USEDEP}] # currently masked by arch/amd64/x32/package.mask -RDEPEND=" -perfscripts? ( - >=media-gfx/asymptote-2.61 - dev-texlive/texlive-latex - dev-tex/latexmk - sys-apps/texinfo - dev-python/sympy[${PYTHON_USEDEP}] - dev-python/numpy[${PYTHON_USEDEP}] - dev-python/scipy[${PYTHON_USEDEP}] - dev-python/pandas[${PYTHON_USEDEP}] ) -${PYTHON_DEPS}" - -DEPEND="dev-util/hip - ${PYTHON_DEPS}" - -BDEPEND=" - test? ( - dev-cpp/gtest - dev-libs/boost - >=sci-libs/fftw-3 - sys-libs/libomp - sci-libs/hipRAND:${SLOT}[${ROCM_USEDEP}] - ) - >=dev-build/cmake-3.22 - dev-build/rocm-cmake - dev-db/sqlite -" - -CHECKREQS_DISK_BUILD="7G" - -IUSE="benchmark perfscripts test" -REQUIRED_USE="perfscripts? ( benchmark ) ${PYTHON_REQUIRED_USE} ${ROCM_REQUIRED_USE}" - -RESTRICT="!test? ( test )" - -S="${WORKDIR}/rocFFT-rocm-${PV}" - -PATCHES=() - -required_mem() { - if use test; then - echo "52G" - else - if [[ -n "${AMDGPU_TARGETS}" ]]; then - # count how many archs user specified in ${AMDGPU_TARGETS} - local NARCH=$(($(awk -F";" '{print NF-1}' <<< "${AMDGPU_TARGETS}" || die)+1)) - else - # The default number of AMDGPU_TARGETS for rocFFT-4.3.0. May change in the future. - local NARCH=7 - fi - echo "$(($(makeopts_jobs)*${NARCH}*25+2200))M" # A linear function estimating how much memory required - fi -} - -pkg_pretend() { - return # leave the disk space check to pkg_setup phase -} - -pkg_setup() { - export CHECKREQS_MEMORY=$(required_mem) - check-reqs_pkg_setup - python_setup -} - -src_prepare() { - sed -e "s/PREFIX rocfft//" \ - -e "/rocm_install_symlink_subdir/d" \ - -i library/src/CMakeLists.txt || die - - sed -e "/rocm_install_symlink_subdir/d" \ - -e "$!N;s:PREFIX\n[ ]*rocfft:# PREFIX rocfft\n:;P;D" \ - -i library/src/device/CMakeLists.txt || die - - if use perfscripts; then - pushd scripts/perf || die - sed -e "/\/opt\/rocm/d" -e "/rocmversion/s,rocm_info.strip(),\"${PV}\"," -i perflib/specs.py || dir - sed -e "/^top/,+1d" -i rocfft-perf suites.py || die - sed -e "s,perflib,${PN}_perflib,g" -i rocfft-perf suites.py perflib/*.py || die - sed -e "/^top = /s,__file__).*$,\"${EPREFIX}/usr/share/${PN}-perflib\")," \ - -i perflib/pdf.py perflib/generators.py || die - popd - fi - - cmake_src_prepare -} - -src_configure() { - addpredict /dev/kfd - addpredict /dev/dri/ - - local mycmakeargs=( - -DCMAKE_SKIP_RPATH=On - -DAMDGPU_TARGETS="$(get_amdgpu_flags)" - -Wno-dev - -DROCM_SYMLINK_LIBS=OFF - -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) - -DBUILD_CLIENTS_RIDER=$(usex benchmark ON OFF) - -DSQLITE_USE_SYSTEM_PACKAGE=ON - -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF - ) - - CXX=hipcc cmake_src_configure -} - -src_test() { - check_amdgpu - cd "${BUILD_DIR}/clients/staging" || die - export LD_LIBRARY_PATH=${BUILD_DIR}/library/src/:${BUILD_DIR}/library/src/device - edob ./${PN,,}-test - edob ./${PN,,}-selftest -} - -src_install() { - cmake_src_install - - if use benchmark; then - cd "${BUILD_DIR}"/clients/staging || die - dobin *rider - fi - - if use perfscripts; then - cd "${S}"/scripts/perf || die - python_foreach_impl python_doexe rocfft-perf - python_moduleinto ${PN}_perflib - python_foreach_impl python_domodule perflib/*.py - insinto /usr/share/${PN}-perflib - doins *.asy suites.py - fi -} diff --git a/sci-libs/rocFFT/rocFFT-5.7.1-r2.ebuild b/sci-libs/rocFFT/rocFFT-5.7.1-r2.ebuild new file mode 100644 index 000000000000..f4ddb494bf35 --- /dev/null +++ b/sci-libs/rocFFT/rocFFT-5.7.1-r2.ebuild @@ -0,0 +1,160 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{10..12} ) +ROCM_VERSION=${PV} + +inherit cmake check-reqs edo multiprocessing python-r1 rocm + +DESCRIPTION="Next generation FFT implementation for ROCm" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocFFT" +SRC_URI="https://github.com/ROCmSoftwarePlatform/rocFFT/archive/rocm-${PV}.tar.gz -> rocFFT-${PV}.tar.gz" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +# RDEPEND: perfscripts? dev-python/plotly[${PYTHON_USEDEP}] # currently masked by arch/amd64/x32/package.mask +RDEPEND=" +perfscripts? ( + >=media-gfx/asymptote-2.61 + dev-texlive/texlive-latex + dev-tex/latexmk + sys-apps/texinfo + dev-python/sympy[${PYTHON_USEDEP}] + dev-python/numpy[${PYTHON_USEDEP}] + dev-python/scipy[${PYTHON_USEDEP}] + dev-python/pandas[${PYTHON_USEDEP}] ) +${PYTHON_DEPS}" + +DEPEND="=dev-util/hip-5* + ${PYTHON_DEPS} + benchmark? ( + dev-libs/boost + sci-libs/hipRAND:${SLOT}[${ROCM_USEDEP}] + ) + test? ( + dev-cpp/gtest + dev-libs/boost + >=sci-libs/fftw-3 + sys-libs/libomp + sci-libs/hipRAND:${SLOT}[${ROCM_USEDEP}] + ) +" + +BDEPEND=" + >=dev-build/cmake-3.22 + dev-build/rocm-cmake + dev-db/sqlite +" + +CHECKREQS_DISK_BUILD="7G" + +IUSE="benchmark perfscripts test" +REQUIRED_USE="perfscripts? ( benchmark ) ${PYTHON_REQUIRED_USE} ${ROCM_REQUIRED_USE}" + +RESTRICT="!test? ( test )" + +S="${WORKDIR}/rocFFT-rocm-${PV}" + +PATCHES=( + "${FILESDIR}"/${PN}-5.7.1-fix-rocm-link-path.patch +) + +required_mem() { + if use test; then + echo "52G" + else + if [[ -n "${AMDGPU_TARGETS}" ]]; then + # count how many archs user specified in ${AMDGPU_TARGETS} + local NARCH=$(($(awk -F";" '{print NF-1}' <<< "${AMDGPU_TARGETS}" || die)+1)) + else + # The default number of AMDGPU_TARGETS for rocFFT-4.3.0. May change in the future. + local NARCH=7 + fi + echo "$(($(makeopts_jobs)*${NARCH}*25+2200))M" # A linear function estimating how much memory required + fi +} + +pkg_pretend() { + return # leave the disk space check to pkg_setup phase +} + +pkg_setup() { + export CHECKREQS_MEMORY=$(required_mem) + check-reqs_pkg_setup + python_setup +} + +src_prepare() { + sed -e "s/PREFIX rocfft//" \ + -e "/rocm_install_symlink_subdir/d" \ + -i library/src/CMakeLists.txt || die + + sed -e "/rocm_install_symlink_subdir/d" \ + -e "$!N;s:PREFIX\n[ ]*rocfft:# PREFIX rocfft\n:;P;D" \ + -i library/src/device/CMakeLists.txt || die + + if use perfscripts; then + pushd scripts/perf || die + sed -e "/\/opt\/rocm/d" -e "/rocmversion/s,rocm_info.strip(),\"${PV}\"," -i perflib/specs.py || dir + sed -e "/^top/,+1d" -i rocfft-perf suites.py || die + sed -e "s,perflib,${PN}_perflib,g" -i rocfft-perf suites.py perflib/*.py || die + sed -e "/^top = /s,__file__).*$,\"${EPREFIX}/usr/share/${PN}-perflib\")," \ + -i perflib/pdf.py perflib/generators.py || die + popd + fi + + cmake_src_prepare +} + +src_configure() { + addpredict /dev/kfd + addpredict /dev/dri/ + + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=On + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -Wno-dev + -DROCM_SYMLINK_LIBS=OFF + -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) + -DBUILD_CLIENTS_RIDER=$(usex benchmark ON OFF) + -DSQLITE_USE_SYSTEM_PACKAGE=ON + -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF + ) + + CXX=hipcc cmake_src_configure +} + +src_test() { + check_amdgpu + cd "${BUILD_DIR}/clients/staging" || die + export LD_LIBRARY_PATH=${BUILD_DIR}/library/src/:${BUILD_DIR}/library/src/device + edob ./${PN,,}-test + edob ./${PN,,}-selftest +} + +src_install() { + cmake_src_install + + if use benchmark; then + cd "${BUILD_DIR}"/clients/staging || die + dobin *rider + + if ! use perfscripts; then + # prevent collision with dev-util/perf + rm -rf "${ED}"/usr/bin/perf || die + fi + fi + + if use perfscripts; then + cd "${S}"/scripts/perf || die + python_foreach_impl python_doexe rocfft-perf + python_moduleinto ${PN}_perflib + python_foreach_impl python_domodule perflib/*.py + insinto /usr/share/${PN}-perflib + doins *.asy suites.py + fi +} diff --git a/sci-libs/rocSPARSE/Manifest b/sci-libs/rocSPARSE/Manifest index 371966cebd71..a49bf66b16f3 100644 --- a/sci-libs/rocSPARSE/Manifest +++ b/sci-libs/rocSPARSE/Manifest @@ -28,5 +28,5 @@ DIST rocSPARSE_shipsec1.tar.gz 22175245 BLAKE2B f1d34482b8c3d3ee48033097d7d6f9b6 DIST rocSPARSE_sme3Dc.tar.gz 42483568 BLAKE2B f0d16b58f6ea28e67727637697a812a0b10d524c73aeb82f702411b9ebaea3670780e762ed0701aae80c6829e950233f74555523ef2d6c5740b007c7a65c03d4 SHA512 c5a0be2db919a45310a73ea1dd96d0d30168135f887a8c5c3d7ca11e2c3d12ea7d1331be20968d0135bd18da4ef2b043db68eeb7567b6f0f8daf782b87083c3a DIST rocSPARSE_webbase-1M.tar.gz 11360460 BLAKE2B 01b1d06adb5fb32fa402ed888d25a7e52a34779b078fc2e21c60e688d03579bb4ca8694f5ee4a824f7859952d55590a1fd9449dc80d6fe894d7b662b918882d4 SHA512 6401a965b8f6dd5392879956fde3c94f77fe1801a4a43428c03b6d639031575c647eab47b1babe53f4ecc2c3b8fc183e8339536b78dcd0d94ce62bfedd87c161 EBUILD rocSPARSE-5.1.3-r1.ebuild 4962 BLAKE2B 75563a04a39be35abc1dc388026ccba17ed40c389f5c3d6b274035fe06f21cee93d4eb46abaf60e09723243768668ddae38d639b371eec65d0798296862dcbaa SHA512 792b4f541ccb30c32863889565de114a334088e29fadeeee54671a67aa6e69c57ad69054d44a8382bf56cae7af815feea5650f5bafe27348f97f8e9a8f831dd7 -EBUILD rocSPARSE-5.7.1-r1.ebuild 4905 BLAKE2B 4d8a6958a8de8956d9f2d2f90ffb4e044c35a2b5cd10e2a03d9529070762fe82649c32d89310a30447df108564651821ba63097e8a085100642adda3fc7dfaaa SHA512 696feb0e457ab356249a84b8840f5c21c99482d0450fac2276d0848931a692c6b70cbfa535c0ce4dba7484ad47c2a763eb389ec39abfe3b27ea836626cb48111 +EBUILD rocSPARSE-5.7.1-r2.ebuild 4919 BLAKE2B 68d80886d30d233fdbc601dd5ac0b08f1d3ccc00de1a9bccf79d49012706b9e88a053ee0b99a44d4b87ce8d1fa9f1b1c647d73c678948d61a328eb01b843a2bb SHA512 5007ca4f6b79122fae121ba290356d1f967752a545f89f4263a42cf74b567a4014d32f15761e6d49cb84c1c1d2c05138203cfc292d70ae769d465cadaa0f3f2c MISC metadata.xml 580 BLAKE2B be266a810a64b24ad0fdddf97e9ec46036c84e6dc0f7d00bc8e4447062fd78f14d0b73c4dccb542dade7db194b8d9034efd08f518aa913fa5e3ea88ae7972ada SHA512 1063ecc68c4634f2819dcab271fda7d12a32c44074e140503f09b0a80e7d599533f2b3a732215ed66a49b3e653e90c843e0b92c3bf0ec07c0ab749c90d7013ea diff --git a/sci-libs/rocSPARSE/rocSPARSE-5.7.1-r1.ebuild b/sci-libs/rocSPARSE/rocSPARSE-5.7.1-r1.ebuild deleted file mode 100644 index d9b2859cad9c..000000000000 --- a/sci-libs/rocSPARSE/rocSPARSE-5.7.1-r1.ebuild +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 1999-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -PYTHON_COMPAT=( python3_{10..12} ) -ROCM_VERSION=${PV} - -inherit cmake edo python-any-r1 toolchain-funcs rocm - -DESCRIPTION="Basic Linear Algebra Subroutines for sparse computation" -HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocSPARSE" - -SRC_URI="https://github.com/ROCmSoftwarePlatform/rocSPARSE/archive/rocm-${PV}.tar.gz -> rocSPARSE-${PV}.tar.gz -test? ( -https://sparse.tamu.edu/MM/SNAP/amazon0312.tar.gz -> ${PN}_amazon0312.tar.gz -https://sparse.tamu.edu/MM/Muite/Chebyshev4.tar.gz -> ${PN}_Chebyshev4.tar.gz -https://sparse.tamu.edu/MM/FEMLAB/sme3Dc.tar.gz -> ${PN}_sme3Dc.tar.gz -https://sparse.tamu.edu/MM/Williams/webbase-1M.tar.gz -> ${PN}_webbase-1M.tar.gz -https://sparse.tamu.edu/MM/Bova/rma10.tar.gz -> ${PN}_rma10.tar.gz -https://sparse.tamu.edu/MM/JGD_BIBD/bibd_22_8.tar.gz -> ${PN}_bibd_22_8.tar.gz -https://sparse.tamu.edu/MM/Williams/mac_econ_fwd500.tar.gz -> ${PN}_mac_econ_fwd500.tar.gz -https://sparse.tamu.edu/MM/Williams/mc2depi.tar.gz -> ${PN}_mc2depi.tar.gz -https://sparse.tamu.edu/MM/Hamm/scircuit.tar.gz -> ${PN}_scircuit.tar.gz -https://sparse.tamu.edu/MM/Sandia/ASIC_320k.tar.gz -> ${PN}_ASIC_320k.tar.gz -https://sparse.tamu.edu/MM/GHS_psdef/bmwcra_1.tar.gz -> ${PN}_bmwcra_1.tar.gz -https://sparse.tamu.edu/MM/HB/nos1.tar.gz -> ${PN}_nos1.tar.gz -https://sparse.tamu.edu/MM/HB/nos2.tar.gz -> ${PN}_nos2.tar.gz -https://sparse.tamu.edu/MM/HB/nos3.tar.gz -> ${PN}_nos3.tar.gz -https://sparse.tamu.edu/MM/HB/nos4.tar.gz -> ${PN}_nos4.tar.gz -https://sparse.tamu.edu/MM/HB/nos5.tar.gz -> ${PN}_nos5.tar.gz -https://sparse.tamu.edu/MM/HB/nos6.tar.gz -> ${PN}_nos6.tar.gz -https://sparse.tamu.edu/MM/HB/nos7.tar.gz -> ${PN}_nos7.tar.gz -https://sparse.tamu.edu/MM/DNVS/shipsec1.tar.gz -> ${PN}_shipsec1.tar.gz -https://sparse.tamu.edu/MM/Cote/mplate.tar.gz -> ${PN}_mplate.tar.gz -https://sparse.tamu.edu/MM/Bai/qc2534.tar.gz -> ${PN}_qc2534.tar.gz -https://sparse.tamu.edu/MM/Chevron/Chevron2.tar.gz -> ${PN}_Chevron2.tar.gz -https://sparse.tamu.edu/MM/Chevron/Chevron3.tar.gz -> ${PN}_Chevron3.tar.gz -https://sparse.tamu.edu/MM/Chevron/Chevron4.tar.gz -> ${PN}_Chevron4.tar.gz -)" - -LICENSE="MIT" -KEYWORDS="~amd64" -IUSE="benchmark test" -REQUIRED_USE="${ROCM_REQUIRED_USE}" -SLOT="0/$(ver_cut 1-2)" - -RDEPEND="dev-util/hip - sci-libs/rocPRIM:${SLOT}" -DEPEND="${RDEPEND}" -BDEPEND="test? ( - dev-cpp/gtest - >=dev-build/cmake-3.22 - $(python_gen_any_dep 'dev-python/pyyaml[${PYTHON_USEDEP}]') -) -benchmark? ( app-admin/chrpath ) -" - -RESTRICT="!test? ( test )" - -S="${WORKDIR}/rocSPARSE-rocm-${PV}" - -PATCHES=( -) - -python_check_deps() { - if use test; then - python_has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" - fi -} - -src_prepare() { - sed -e "s/PREFIX rocsparse//" \ - -e "/ + + + + sci@gentoo.org + Gentoo Science Project + + + lockalsash@gmail.com + Sv. Lockal + + + ROCm/rocWMMA + + diff --git a/sci-libs/rocWMMA/rocWMMA-5.7.1.ebuild b/sci-libs/rocWMMA/rocWMMA-5.7.1.ebuild new file mode 100644 index 000000000000..a2ca2b3404da --- /dev/null +++ b/sci-libs/rocWMMA/rocWMMA-5.7.1.ebuild @@ -0,0 +1,56 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 +ROCM_SKIP_GLOBALS=1 + +inherit cmake rocm + +DESCRIPTION="library for accelerating mixed precision matrix multiply-accumulate operations" +HOMEPAGE="https://github.com/ROCm/rocWMMA" +SRC_URI="https://github.com/ROCm/rocWMMA/archive/rocm-${PV}.tar.gz -> rocWMMA-${PV}.tar.gz" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +DEPEND="=dev-util/hip-5*" + +BDEPEND=" + test? ( + dev-cpp/gtest + ) + dev-build/rocm-cmake +" + +IUSE_TARGETS=( gfx908 gfx90a gfx1100 gfx1101 gfx1102 ) +IUSE_TARGETS=( "${IUSE_TARGETS[@]/#/amdgpu_targets_}" ) + +IUSE="${IUSE_TARGETS[@]/#/+} test" + +REQUIRED_USE="|| ( ${IUSE_TARGETS[*]} )" + +RESTRICT="!test? ( test )" + +S="${WORKDIR}/rocWMMA-rocm-${PV}" + +PATCHES=( + "${FILESDIR}"/${PN}-5.7.1-use-system-googletest.patch +) + +src_configure() { + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=ON + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DROCM_SYMLINK_LIBS=OFF + -DROCWMMA_BUILD_SAMPLES=OFF + -DROCWMMA_BUILD_TESTS=$(usex test ON OFF) + ) + use test && mycmakeargs+=(-DROCWMMA_USE_SYSTEM_GOOGLETEST=ON) + CC=hipcc CXX=hipcc cmake_src_configure +} + +src_test() { + check_amdgpu + cmake_src_test -j1 +} -- cgit v1.2.3