diff options
Diffstat (limited to 'dev-cpp/xsimd')
-rw-r--r-- | dev-cpp/xsimd/Manifest | 4 | ||||
-rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch | 148 | ||||
-rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch | 88 | ||||
-rw-r--r-- | dev-cpp/xsimd/xsimd-13.0.0.ebuild | 60 |
4 files changed, 300 insertions, 0 deletions
diff --git a/dev-cpp/xsimd/Manifest b/dev-cpp/xsimd/Manifest index 21d48dbecda6..f69c57ee861c 100644 --- a/dev-cpp/xsimd/Manifest +++ b/dev-cpp/xsimd/Manifest @@ -1,8 +1,12 @@ AUX xsimd-11.1.0-c++17.patch 1429 BLAKE2B 23be29fdd74ed37632bd1fba0ffdedf08af48f14985494f5befac4afb34622bb44e294e954d9d4e57a07ee6e9d988a48a93dd1b66a6f6206b79bd7c0cb600cc8 SHA512 41a474a6d768aec39e6760d4ff60017a5e3d5c4349bf244d3de1464bd9b06022d6cadcd4f91cc17a6630f38f1b0fe0e3aa4fa5a6810b8c57d649f5a8da28c217 AUX xsimd-11.1.0-no-march.patch 3720 BLAKE2B 21adb3c35d8161db849c90363781642fa9c1051686e6dba00e55396adc905d1276540c663d34ab7bd124daa3fe6e367c5c41c17d7e0f31d55b82b1488fbdee0f SHA512 567f535a2a0dcdb02f05b53beba48e9704ef40d63df9531014257e0773a8df09854245ec5d22e6b7cffdf7541e8f48e55e2ea8faa513ca93f952aa177757bb3a AUX xsimd-12.1.1-no-march.patch 3739 BLAKE2B 44fb2ddfb55b8109e3b92a1479647ecbaf0f16fe949bc70c2f12fcfbd1cb0d87bfdf43d0fc2ac835bde8dbe843527d6426692a2fd0b504b6cfdcee824ae5d262 SHA512 82d5d1834af23f94ef0421c328e8f0b84c4c3b05acbd3c05bc9ea9c56c4078e827b8dcecf5f726deb35de9b3693c63effd5b7085c07a3b964f25a577d53c6da2 +AUX xsimd-13.0.0-detection-simd-with-mitigations.patch 5810 BLAKE2B a9a2bf8211e60c4e370c6c845cb06a1b17c7666778e907ba3215c9309a82e70efba2462cfb77298f15dd2366b0354c5830c99a3d08cfd619981b5e9c5179726b SHA512 b01b33722aa0a45f74d4df9810ea442ec37932aeb188c5c40e63f15533667b82b0ad5a0c2c737e18ad327e70c44bf13669f46dcc57fcde305dd94f0d24421e9f +AUX xsimd-13.0.0-sve-rvv.patch 3244 BLAKE2B 50681c648f98caf5efa5c916618be090435f1e35ecc1877e4c933e31d2e869c25cc36b6c8d885ec0749753b07b903125b17dbfd55e95e5f5e1a3e01363942463 SHA512 36f4dfee1103155b1dc9159293690df937fd554d0ce401834193e59283f65beb366f9a48f5fc25bb367a11c0c2b7a57b7e225d1a9cd74de9f1a143e919c0b052 DIST xsimd-11.1.0.tar.gz 219350 BLAKE2B 2bbbc4f7dbe489a407fe798b146c008aba44664efc943c2e1507d5c6e7da2a03ed91abf0d872e5abf9bd94be3e76bef276ec5b47d4e356b42a7b4c680bd6f3d9 SHA512 3a6141dfa4d95a977f4222880dfd06197613d153a78a84653022423279eec037ea9def08ae225aba7231c0b2c434ab7c907c965f8367fb0db9b96113980b51f3 DIST xsimd-12.1.1.tar.gz 253141 BLAKE2B 29efbb045d8ade8737d702a73f3d0a912111dd4fbc84485c0e54c8b06d73edbb4b85f4b51e24da9bed0dea010b0cce9d99b57e20e8b94d3daf90d46031548eb9 SHA512 8e45a8e9b28358d5f20f713ea19a8c366edc62790c27984149f283dfe808d78a549c8ec465e8b3677d7e30b2cb80093908de364bbb9dc80683f5fdfb843131e1 +DIST xsimd-13.0.0.tar.gz 259967 BLAKE2B 63267c75a7e8ed2e8689a912e79a1d012ff3cf3813969f97a640801e372ec9e66a6e3d3034b6918a30dc3374e8211fd1107f1309156354b6bca6ba17c6f132b4 SHA512 cdc42ddad3353297cf25ea2b6b3f09967f5f388efc26241f2997979fdbbac072819ff771145bc5bfa86cb326cca84b4119e8e6e3f658407961cf203a40603a7f EBUILD xsimd-11.1.0-r1.ebuild 1034 BLAKE2B fdc6a900c039da14a5091ee7ed4a9ea9e2611610a9bd812fc6be91544e08e2f91d101b0800e70c8df224bab2d08677723e9aece300edbca585d9e31ee9425c4a SHA512 0e00b0d9143cd2c2736be80f35616fbb17789bc9696efb9ee340d13243b2c1c299314549faeeb6e02b3029e96709d8e7c108fe2b58cd5658aa60a8e9ff44c1e1 EBUILD xsimd-12.1.1.ebuild 1045 BLAKE2B 263ebcc57be8996455820190be8d339f4f22ceb17d4da6e8f1438502307b429b091cf721d2ea9e55e7a2a6c66e65d1d824a271af3f5aeec0d66aa519c2e345c7 SHA512 998d903d3841879ce50c7e812b74d04bea52f49eb02222d886d4322f4cd7dd386c6eecf95f39b26668c93d91ac899c2c336ba6a677d3bd354e07c5aff52d8ed3 +EBUILD xsimd-13.0.0.ebuild 1161 BLAKE2B fecc69fcb7fb158a22d5bf98f3b4f0e7ea7a69364daf25371f2fe9979fd0b8b2a6bb1d1fcbbe6dd54d4c5d0cd1877cf2eb9f56d357e98d722ced29136b701732 SHA512 536dcb2b6373a3463142d03cca4544856d529106bd049aca0e720c14a09cf4457a4d9a42de0fe3bbd859b7934cb180469191f513e5013155bc100944702f018e MISC metadata.xml 385 BLAKE2B e22c7622c338c277eee7dd048463a2d3244fffcd255b1a9dd9652a1295c3c1f0043e67fab28b084b3f10b03b5fd5e36b4f713c040ddc168ac6be9287af14e031 SHA512 5826eb1e62fa79c1e355668ad83a9e4e113b3cf154535537181597062f952ad436c56c8d9dd9f3599815307e33f4e1366b2a9b109d27c53bc112fe0a4c62d885 diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch new file mode 100644 index 000000000000..6aab22cd8416 --- /dev/null +++ b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch @@ -0,0 +1,148 @@ +https://mail.kde.org/pipermail/distributions/2024-July/001511.html +https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e + +From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001 +From: Dmitry Kazakov <dimula73@gmail.com> +Date: Tue, 28 May 2024 22:21:08 +0200 +Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly + disabled by OS + +Some CPU vulnerability mitigations may disable AVX functionality +on the hardware level via the XCR0 register. We should check that +manually to verify that OS actually allows us to use this feature. + +See https://bugs.kde.org/show_bug.cgi?id=484622 + +Fix #1025 +--- + include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ + 1 file changed, 72 insertions(+), 19 deletions(-) + +diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp +index f22089bac..6dda3be09 100644 +--- a/include/xsimd/config/xsimd_cpuid.hpp ++++ b/include/xsimd/config/xsimd_cpuid.hpp +@@ -114,6 +114,35 @@ namespace xsimd + #endif + + #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) ++ ++ auto get_xcr0_low = []() noexcept ++ { ++ uint32_t xcr0; ++ ++#if defined(_MSC_VER) && _MSC_VER >= 1400 ++ ++ xcr0 = (uint32_t)_xgetbv(0); ++ ++#elif defined(__GNUC__) ++ ++ __asm__( ++ "xorl %%ecx, %%ecx\n" ++ "xgetbv\n" ++ : "=a"(xcr0) ++ : ++#if defined(__i386__) ++ : "ecx", "edx" ++#else ++ : "rcx", "rdx" ++#endif ++ ); ++ ++#else /* _MSC_VER < 1400 */ ++#error "_MSC_VER < 1400 is not supported" ++#endif /* _MSC_VER && _MSC_VER >= 1400 */ ++ return xcr0; ++ }; ++ + auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept + { + +@@ -148,19 +177,43 @@ namespace xsimd + + get_cpuid(regs1, 0x1); + +- sse2 = regs1[3] >> 26 & 1; +- sse3 = regs1[2] >> 0 & 1; +- ssse3 = regs1[2] >> 9 & 1; +- sse4_1 = regs1[2] >> 19 & 1; +- sse4_2 = regs1[2] >> 20 & 1; +- fma3_sse42 = regs1[2] >> 12 & 1; ++ // OS can explicitly disable the usage of SSE/AVX extensions ++ // by setting an appropriate flag in CR0 register ++ // ++ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html ++ ++ unsigned sse_state_os_enabled = 1; ++ unsigned avx_state_os_enabled = 1; ++ unsigned avx512_state_os_enabled = 1; ++ ++ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit ++ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and ++ // to support processor extended state management using ++ // XSAVE/XRSTOR. ++ bool osxsave = regs1[2] >> 27 & 1; ++ if (osxsave) ++ { ++ ++ uint32_t xcr0 = get_xcr0_low(); ++ ++ sse_state_os_enabled = xcr0 >> 1 & 1; ++ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; ++ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; ++ } ++ ++ sse2 = regs1[3] >> 26 & sse_state_os_enabled; ++ sse3 = regs1[2] >> 0 & sse_state_os_enabled; ++ ssse3 = regs1[2] >> 9 & sse_state_os_enabled; ++ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; ++ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; ++ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; + +- avx = regs1[2] >> 28 & 1; ++ avx = regs1[2] >> 28 & avx_state_os_enabled; + fma3_avx = avx && fma3_sse42; + + int regs8[4]; + get_cpuid(regs8, 0x80000001); +- fma4 = regs8[2] >> 16 & 1; ++ fma4 = regs8[2] >> 16 & avx_state_os_enabled; + + // sse4a = regs[2] >> 6 & 1; + +@@ -168,23 +221,23 @@ namespace xsimd + + int regs7[4]; + get_cpuid(regs7, 0x7); +- avx2 = regs7[1] >> 5 & 1; ++ avx2 = regs7[1] >> 5 & avx_state_os_enabled; + + int regs7a[4]; + get_cpuid(regs7a, 0x7, 0x1); +- avxvnni = regs7a[0] >> 4 & 1; ++ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; + + fma3_avx2 = avx2 && fma3_sse42; + +- avx512f = regs7[1] >> 16 & 1; +- avx512cd = regs7[1] >> 28 & 1; +- avx512dq = regs7[1] >> 17 & 1; +- avx512bw = regs7[1] >> 30 & 1; +- avx512er = regs7[1] >> 27 & 1; +- avx512pf = regs7[1] >> 26 & 1; +- avx512ifma = regs7[1] >> 21 & 1; +- avx512vbmi = regs7[2] >> 1 & 1; +- avx512vnni_bw = regs7[2] >> 11 & 1; ++ avx512f = regs7[1] >> 16 & avx512_state_os_enabled; ++ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; ++ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; ++ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; ++ avx512er = regs7[1] >> 27 & avx512_state_os_enabled; ++ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; ++ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; ++ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; ++ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; + avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; + #endif + } + diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch new file mode 100644 index 000000000000..38ea56d1baa2 --- /dev/null +++ b/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch @@ -0,0 +1,88 @@ +https://mail.kde.org/pipermail/distributions/2024-July/001511.html +https://github.com/xtensor-stack/xsimd/commit/80a59235e3ffa51659aaa06f002bfd088b77023c + +From 80a59235e3ffa51659aaa06f002bfd088b77023c Mon Sep 17 00:00:00 2001 +From: Dmitry Kazakov <dimula73@gmail.com> +Date: Fri, 14 Jun 2024 10:19:55 +0200 +Subject: [PATCH] Fix xsimd::available_architectures().has() for sve and rvv + archs + +Ideally the patch CPU detection code should also check if the length +of SVE and RVV is actually supported by the current CPU implementation +(i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but +I don't have such CPUs/emulators handy, so I cannot add such checks. + +Given that xsimd::available_architectures().has() is a new feature +of XSIMD13 and the length check has never been present in XSIMD, this +bug is not a regression at least. + +The patch also adds a unittest that reproduces the error the patch fixes +--- + include/xsimd/config/xsimd_cpuid.hpp | 12 ++++++++++-- + test/test_arch.cpp | 15 +++++++++++++++ + 2 files changed, 25 insertions(+), 2 deletions(-) + +diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp +index 6dda3be09..8021fceb8 100644 +--- a/include/xsimd/config/xsimd_cpuid.hpp ++++ b/include/xsimd/config/xsimd_cpuid.hpp +@@ -42,6 +42,10 @@ namespace xsimd + #define ARCH_FIELD_EX(arch, field_name) \ + unsigned field_name; \ + XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } ++ ++#define ARCH_FIELD_EX_REUSE(arch, field_name) \ ++ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } ++ + #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name) + + ARCH_FIELD(sse2) +@@ -72,8 +76,12 @@ namespace xsimd + ARCH_FIELD(neon) + ARCH_FIELD(neon64) + ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64) +- ARCH_FIELD(sve) +- ARCH_FIELD(rvv) ++ ARCH_FIELD_EX(detail::sve<512>, sve) ++ ARCH_FIELD_EX_REUSE(detail::sve<256>, sve) ++ ARCH_FIELD_EX_REUSE(detail::sve<128>, sve) ++ ARCH_FIELD_EX(detail::rvv<512>, rvv) ++ ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv) ++ ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv) + ARCH_FIELD(wasm) + + #undef ARCH_FIELD +diff --git a/test/test_arch.cpp b/test/test_arch.cpp +index b42073358..f1f50d546 100644 +--- a/test/test_arch.cpp ++++ b/test/test_arch.cpp +@@ -38,6 +38,16 @@ struct check_supported + } + }; + ++struct check_cpu_has_intruction_set ++{ ++ template <class Arch> ++ void operator()(Arch arch) const ++ { ++ static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value, ++ "cannot test instruction set availability on CPU"); ++ } ++}; ++ + struct check_available + { + template <class Arch> +@@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]") + xsimd::supported_architectures::for_each(check_supported {}); + } + ++ SUBCASE("xsimd::available_architectures::has") ++ { ++ xsimd::all_architectures::for_each(check_cpu_has_intruction_set {}); ++ } ++ + SUBCASE("xsimd::default_arch::name") + { + constexpr char const* name = xsimd::default_arch::name(); + diff --git a/dev-cpp/xsimd/xsimd-13.0.0.ebuild b/dev-cpp/xsimd/xsimd-13.0.0.ebuild new file mode 100644 index 000000000000..41414ef626b6 --- /dev/null +++ b/dev-cpp/xsimd/xsimd-13.0.0.ebuild @@ -0,0 +1,60 @@ +# Copyright 2023-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +inherit cmake + +DESCRIPTION="C++ wrappers for SIMD intrinsics" +HOMEPAGE="https://github.com/xtensor-stack/xsimd" +SRC_URI="https://github.com/xtensor-stack/${PN}/archive/refs/tags/${PV}.tar.gz + -> ${P}.tar.gz" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~amd64 ~arm ~arm64 ~hppa ~loong ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86" +IUSE="doc test" +RESTRICT="!test? ( test )" + +BDEPEND=" + doc? ( + app-text/doxygen + dev-python/breathe + dev-python/sphinx + dev-python/sphinx-rtd-theme + ) + test? ( dev-cpp/doctest )" + +PATCHES=( + "${FILESDIR}"/${PN}-11.1.0-c++17.patch + "${FILESDIR}"/${PN}-12.1.1-no-march.patch + "${FILESDIR}"/${PN}-13.0.0-sve-rvv.patch + "${FILESDIR}"/${PN}-13.0.0-detection-simd-with-mitigations.patch +) + +src_prepare() { + sed -i \ + -e '/fPIC/d' \ + test/CMakeLists.txt \ + || die + cmake_src_prepare +} + +src_configure() { + local mycmakeargs=( + -DBUILD_TESTS=$(usex test) + ) + cmake_src_configure +} + +src_compile() { + cmake_src_compile + use doc && emake -C docs html +} + +src_install() { + cmake_src_install + if use doc; then + dodoc -r docs/build/html + fi +} |