From 9967860c0cd26c0940ee4b9ab6698e00e5b1260a Mon Sep 17 00:00:00 2001 From: V3n3RiX Date: Mon, 28 Aug 2023 10:02:11 +0100 Subject: gentoo auto-resync : 28:08:2023 - 10:02:11 --- media-libs/Manifest.gz | Bin 68866 -> 68871 bytes media-libs/opencv/Manifest | 3 +- .../opencv/files/opencv-4.8.0-arm64-fp16.patch | 272 +++++++++++++++++++++ media-libs/opencv/opencv-4.8.0-r1.ebuild | 1 + 4 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch (limited to 'media-libs') diff --git a/media-libs/Manifest.gz b/media-libs/Manifest.gz index ae7d1322efaf..78e20c77407a 100644 Binary files a/media-libs/Manifest.gz and b/media-libs/Manifest.gz differ diff --git a/media-libs/opencv/Manifest b/media-libs/opencv/Manifest index eb6f92a9b88d..83626ac835bf 100644 --- a/media-libs/opencv/Manifest +++ b/media-libs/opencv/Manifest @@ -5,6 +5,7 @@ AUX opencv-4.4.0-disable-native-cpuflag-detect.patch 1218 BLAKE2B 06569f6b7d33ec AUX opencv-4.5.0-link-with-cblas-for-lapack.patch 726 BLAKE2B adbffd961c88229bf810e13fe20705352ff506a653c1d55bb0687c03d8daf9e05aed0f2ba9cd6e306c625837d6ddf64eeb99dc6cbf2b15fae833f4a7eea6aad0 SHA512 4c3108e304721c2cd78eb82f5d7bccc18831db5f47bc628a98ae6a37da389deef30e7e9b6dc5644e3bc0e0fafdd907dc37822a58a25555999698527d2ac364b0 AUX opencv-4.6.0-fix-build-examples.patch 700 BLAKE2B 4560255001cde7e8071d2ee93a437f01a9f745e825b92b1a9f66fab143f5e060a01c706d180e8ee5449e4ca1a0eec591c71215df18ba3f49fab683124f577007 SHA512 02c51e755c98b2b51f11f1d34a88ec77f727f91d5af91426a5c625215ccde28ca57de356f2187df003fa2bc97ef9304a2d95b8aca0e71b999341b0d258e8bc0b AUX opencv-4.6.0-fix-ffmpeg-5.patch 547 BLAKE2B a1257e21f8c9ae5255c9155d913c304907e9af0ba8ca1d2d2bb10e2cbe54c9c98aa495f15c6c7b999bbfe2ee9c47c2898859ed84a121fcdd1549c1d122779cc4 SHA512 75961c301082e21839ade53a9ec3adb8d8f2b150e1a87effa7a0d7de3b30ccc7f1506d94311740fc2c933666317e32953626cdd38acecfaff854124b1f8d4163 +AUX opencv-4.8.0-arm64-fp16.patch 12222 BLAKE2B 18803d74cb57bbc303c8159ca69d24dcc47d00a314bfdababeadd3b85306d289c8b35a1106011086931396e0ab82d98964434d0c0eaa34c25451c766ca9b9199 SHA512 1d9d42a711a1ae0ec7f1f641a68271d4b36f6b8701997a05d65c633d3531cad75e4edbf71c4f80cc59a9a14baa8ddebf19081dc67b99898dce13da8e3746227e AUX opencv-4.8.0-fix-flatbuffer.patch 1764 BLAKE2B 806f61bf7017fbcb5b5058686db9dd2272eb61d6a2952f0f029bc76d62172a18f9a21661426f13cfd5066fc60710b218bb9b2dfded61e33040469da0e0b72c94 SHA512 2679f52c3df16b7daac1fafa0b24796568a2555de03805ee6ee6b2e08a2be9f63fca44c1b472f5ffe2dfb7fe8b3d754b3ba39a71419974613a9b3de70a09cefb AUX opencv-4.8.0-fix-protobuf.patch 543 BLAKE2B 2c3ac7ddeda366cb117d5deddefe732dd6918bc54e468b1cc8d95e40bb9e5cde1776046a894e8f26416edffa39dc6c97547cbd688e0566587f9690e721b4c1e1 SHA512 799e3f3d019624476a285c2795d28f022bb126e9b9f511b153aef131a7cae2e3fb6361bd21b7ea34dfea984e3ef9e1a0a40d09a5a72a808a31e1eb15548b15a7 DIST opencv-3.4.0-face_landmark_model.tar.gz 63299830 BLAKE2B 58f08cd8c030ee1c8b66e76a561fd625e112face427d001185f8d7c0eaace55adbd8474663021841a1382bfe393a210e64c51223441713cdb9156fac866a845c SHA512 2cd29ce42c08b2966db3ef4a2bc217ab3d07051819757ec6de8f3deea29e28f60abab39f101dba03f766f17018598e411bd687a7eaab0c681c113e10a2de2b23 @@ -20,6 +21,6 @@ DIST vgg_boostdesc-3.2.0.tar.gz 1867770 BLAKE2B 1fa5b58e73b6fa56ecf8d19af22298f7 EBUILD opencv-4.6.0-r4.ebuild 21277 BLAKE2B 458906fe516524a8d997e6645d16ca3621774795271cb06aa2bb414a5937139db84beffb1dfa39d96215f45aa56c93461463a62f5f771cfc16396b3d926e049b SHA512 2d2b557fe6b260cd4c3ffbd4159cac68dafb9fc22138ff2f119350723a44f3376ee39f20d8c8b2d61952d88d9a60b470d36477552f0a1eb9b12eccd690547e6e EBUILD opencv-4.7.0-r1.ebuild 21289 BLAKE2B 0e2c1ac23fcd0ec261329eeeb73f035bb92bdd0d114b6123a9934d85937034e65e22a5264553cf294c66023a23e7bab7bb77560bb422a0d36ed726bc010afdd8 SHA512 332206b352f57fd16375b104f84e3019a8358ed174159f0827f51ac522ccdb62c360fcea1a94c2333adae5468acb95e78ad6897c3e8f9daa33292161794b737e EBUILD opencv-4.7.0.ebuild 21242 BLAKE2B 930ce7cdb362134193217aced8fb53e6695cde0d2abf1582db923c5bad84c01189eea5d76dc6bfd81e1bbcd0e286f94c2d17b59bb3ace0dd2cdbf26e21be6d17 SHA512 43f45361d176e7f34dc0c1d98dc459b351215b9a2db8d76390b384e774a95db8792386fecd35007ffee90d2a275f3daea010da1cea5e9728221f71d6c6a44164 -EBUILD opencv-4.8.0-r1.ebuild 21456 BLAKE2B 49b088c4ec805c010825ed91cbfe89521e938c50c0fba33ccdc812b5f36a920c79e7865f988b7b1241b3ac6fc06036c439b7708562e928e969c61fd8d0207361 SHA512 20bf41d3edae8b8722636114934c180c3f08701aea55e7a7f5daae4924dd543f7f3c80ad2dae1ae78b7e075bad4f709cdfdcdccb78e818879f0f67f36982343f +EBUILD opencv-4.8.0-r1.ebuild 21500 BLAKE2B 0857dd0ebac0e6d9839031ec3c254d699923485d51cda623c38731ee356b57b6ce0f9401f0f1627db4ef9c769af0d86cc208145da89102c3b63179bf21c6002c SHA512 0b3a550504a762aa99b04ddd74adbd9069ca103bc6de47d03987ffced8e8a5188eeea62e78737a38339e98e03700fe0944cefee1cff3504967fcfadfd317336e EBUILD opencv-4.8.0.ebuild 21266 BLAKE2B f7eef7599c1dba1eeafc61ff2717d47229c64ce3c9592da01a2ac8d0e75e6c7158c1b66346db77d969e573d5f7e50880b42827b185184c5fa571b7cc7b4dc747 SHA512 a47347e0966585c4b5789ddb11373e5b9b7559e26a8ab401de25b2f427b85cdbf7a9d47ea53d654e79bd44fece38d45f437ad91bdbe872410b9cd881192d3055 MISC metadata.xml 3147 BLAKE2B 37bb20a0fd752e1e3d4fae7f1b549be49648cb45cc3c6537a9bdb01b609cbab68a443bc17ded790bedd3afa800d1ce16c4220ebdd42a0d9dd73dacf415d29b5f SHA512 e9cf8dc6a63be3708ee595149122ffb2ef9722402716ac4a100c971a39c17e82e3d32253c03f56dd4814202a0727b29d8fb0879605a71c15e0a3f8e01e152069 diff --git a/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch b/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch new file mode 100644 index 000000000000..6bf04daf58ae --- /dev/null +++ b/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch @@ -0,0 +1,272 @@ +https://github.com/opencv/opencv/pull/24203 + +From 689fa6f372975d58e9f50fd17a0abd105b1815f1 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Mon, 28 Aug 2023 04:20:58 +0100 +Subject: [PATCH] Fix compilation on arm64 with FP16 when disabled + +If building with -mcpu=native or any other setting which implies the current +CPU has FP16 but with intrinsics disabled, we mistakenly try to use it even +though convolution.hpp conditionally defines it correctly based on whether +we should *use it*. convolution.cpp on the other hand was mismatched and +trying to use it if the CPU supported it, even if not enabled in the build +system. + +Make the guards match. + +Bug: https://bugs.gentoo.org/913031 +Signed-off-by: Sam James +--- a/modules/dnn/src/layers/cpu_kernels/convolution.cpp ++++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp +@@ -118,7 +118,7 @@ Ptr initFastConv( + const size_t wstep = weightsMat.step1(); + + conv->useFP16 = false; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + // TODO: add FP16 support for Winograd. + if (_useFP16 && (conv->conv_type == CONV_TYPE_GENERIC || conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN)) + conv->useFP16 = true; +@@ -137,7 +137,7 @@ Ptr initFastConv( + int padded_ksize = ((ksize + VEC_ALIGN-1) / VEC_ALIGN) * VEC_ALIGN; + int nweights = C * padded_ksize; + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (conv->useFP16) + { + conv->weightsBuf_FP16.resize(nweights + VEC_ALIGN); +@@ -190,7 +190,7 @@ Ptr initFastConv( + #endif + const int CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32; // for AVX2, it is 8, otherwise, it's 16. + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + // FP 16 + const int CONV_WINO_ATOM_F16 = CONV_WINO_ATOM_F32 * 2; + const int CONV_WINO_NATOMS_F16 = CONV_WINO_AREA / CONV_WINO_ATOM_F16; +@@ -208,7 +208,7 @@ Ptr initFastConv( + size_t nweights = ngroups*Kg_nblocks*Cg*CONV_WINO_KBLOCK*CONV_WINO_AREA; + + float* wptrWino = nullptr; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + float16_t* wptrWino_FP16 = nullptr; + if (conv->useFP16) + { +@@ -264,7 +264,7 @@ Ptr initFastConv( + } + + // repack the data. +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (conv->useFP16) + { + float16_t* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA + +@@ -308,7 +308,7 @@ Ptr initFastConv( + + float* weightsBufPtr = nullptr; + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + int numStripsMR_FP16 = (Kg + CONV_MR_FP16 - 1) / CONV_MR_FP16; + int Kg_aligned_FP16 = numStripsMR_FP16 * CONV_MR_FP16; + size_t nweights_FP16 = ngroups * Kg_aligned_FP16 * DkHkWkCg; +@@ -331,7 +331,7 @@ Ptr initFastConv( + } + + // Pack the weight. +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (conv->useFP16) + { + parallel_for_(Range(0, ngroups * numStripsMR_FP16), [&](const Range& r0){ +@@ -415,7 +415,7 @@ static inline void packData8(char*& inpbuf, float*& inptrIn, int& in_w, int& x0, + char * inpbufC = inpbuf + s0 * esz; + float* inptrInC = (float* )inptrIn; + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + float16_t* inpbufC_FP16 = (float16_t *)inpbufC; + if (esz == sizeof(float16_t)) + { +@@ -521,7 +521,7 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0 + char* inpbufC = inpbuf + s0 * esz; + float* inptrInC = inptrIn; + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + float16_t* inpbufC_FP16 = (float16_t *)inpbufC; + if (esz == sizeof(float16_t)) + { +@@ -553,7 +553,7 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0 + in_w += stride_w; + } + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + // Fast convert float 32 to float16 + static inline void _cvt32f16f( const float* src, float16_t* dst, int len) + { +@@ -623,7 +623,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + { + // Make special branch where memcpy() is called with a constant buffer size. + // Compilers will likely unroll this loop properly. +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz) +@@ -636,7 +636,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz) +@@ -700,7 +700,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + int w0 = std::max(0, (-in_w + dilation_w-1)/dilation_w); + int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w); + const float* inptrInC = inptrIn; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + float16_t* inpbufC = (float16_t *)inpbuf + s0; +@@ -761,7 +761,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w); + + const float* inptrInC = inptrIn; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + float16_t* inpbufC = (float16_t *)inpbuf + s0; +@@ -834,7 +834,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + int w0 = std::max(0, (-in_w + dilation_w-1)/dilation_w); + int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w); + const float* inptrInC = inptrIn; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + float16_t* inpbufC = (float16_t* )inpbuf + s0; +@@ -887,7 +887,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + for (; i < CONV_NR;) + { + float* inpbuf_ki = (float* )inpbuf + k * CONV_NR * Cg + i; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + float16_t * inpbuf_ki_FP16 = (float16_t *)inpbuf + k * CONV_NR * Cg + i; + #endif + +@@ -903,7 +903,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + { + if (stride_w == 1) + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -934,7 +934,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else if (stride_w == 2) + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -967,7 +967,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -1006,7 +1006,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + { + if (stride_w == 1) + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -1029,7 +1029,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -1057,7 +1057,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize) +@@ -1073,7 +1073,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta + } + else + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR) +@@ -1260,7 +1260,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co + int CONV_MR = CONV_MR_FP32; + int esz = sizeof(float ); + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + // works at FP 16. +@@ -1433,7 +1433,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co + } + + char *weights = nullptr; +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + CV_Assert(!conv->weightsBuf_FP16.empty()); +@@ -1474,7 +1474,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co + #if CV_NEON && CV_NEON_AARCH64 + if (conv->useNEON) + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + opt_NEON::convBlockMR1_FP16(DkHkWkCg, weights, inptr, cptr, biasVal, fusedAdd, minval, maxval, ifMinMaxAct, outLen, CONV_NR); +@@ -1537,7 +1537,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co + #if CV_NEON + if (conv->useNEON) + { +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + opt_NEON::convBlock_FP16(c1 - c0, wptr, inptr, (char *)cptr_f16, ldc, c0 == 0, outLen, CONV_MR, CONV_NR); +@@ -1567,7 +1567,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co + float biasval = biasptr[k]; + int j = 0; + +-#ifdef CONV_ARM_FP16 ++#if defined(CONV_ARM_FP16) && CV_FP16 + if (useFP16) + { + float32x4_t vbias = vdupq_n_f32(biasval); diff --git a/media-libs/opencv/opencv-4.8.0-r1.ebuild b/media-libs/opencv/opencv-4.8.0-r1.ebuild index 846e57c7514b..27cec3eb3fa4 100644 --- a/media-libs/opencv/opencv-4.8.0-r1.ebuild +++ b/media-libs/opencv/opencv-4.8.0-r1.ebuild @@ -294,6 +294,7 @@ PATCHES=( "${FILESDIR}"/${PN}-4.5.0-link-with-cblas-for-lapack.patch "${FILESDIR}"/${PN}-4.8.0-fix-protobuf.patch "${FILESDIR}"/${PN}-4.8.0-fix-flatbuffer.patch + "${FILESDIR}"/${PN}-4.8.0-arm64-fp16.patch ) pkg_pretend() { -- cgit v1.2.3