From 9967860c0cd26c0940ee4b9ab6698e00e5b1260a Mon Sep 17 00:00:00 2001
From: V3n3RiX <venerix@koprulu.sector>
Date: Mon, 28 Aug 2023 10:02:11 +0100
Subject: gentoo auto-resync : 28:08:2023 - 10:02:11

---
 media-libs/Manifest.gz                             | Bin 68866 -> 68871 bytes
 media-libs/opencv/Manifest                         |   3 +-
 .../opencv/files/opencv-4.8.0-arm64-fp16.patch     | 272 +++++++++++++++++++++
 media-libs/opencv/opencv-4.8.0-r1.ebuild           |   1 +
 4 files changed, 275 insertions(+), 1 deletion(-)
 create mode 100644 media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch

(limited to 'media-libs')

diff --git a/media-libs/Manifest.gz b/media-libs/Manifest.gz
index ae7d1322efaf..78e20c77407a 100644
Binary files a/media-libs/Manifest.gz and b/media-libs/Manifest.gz differ
diff --git a/media-libs/opencv/Manifest b/media-libs/opencv/Manifest
index eb6f92a9b88d..83626ac835bf 100644
--- a/media-libs/opencv/Manifest
+++ b/media-libs/opencv/Manifest
@@ -5,6 +5,7 @@ AUX opencv-4.4.0-disable-native-cpuflag-detect.patch 1218 BLAKE2B 06569f6b7d33ec
 AUX opencv-4.5.0-link-with-cblas-for-lapack.patch 726 BLAKE2B adbffd961c88229bf810e13fe20705352ff506a653c1d55bb0687c03d8daf9e05aed0f2ba9cd6e306c625837d6ddf64eeb99dc6cbf2b15fae833f4a7eea6aad0 SHA512 4c3108e304721c2cd78eb82f5d7bccc18831db5f47bc628a98ae6a37da389deef30e7e9b6dc5644e3bc0e0fafdd907dc37822a58a25555999698527d2ac364b0
 AUX opencv-4.6.0-fix-build-examples.patch 700 BLAKE2B 4560255001cde7e8071d2ee93a437f01a9f745e825b92b1a9f66fab143f5e060a01c706d180e8ee5449e4ca1a0eec591c71215df18ba3f49fab683124f577007 SHA512 02c51e755c98b2b51f11f1d34a88ec77f727f91d5af91426a5c625215ccde28ca57de356f2187df003fa2bc97ef9304a2d95b8aca0e71b999341b0d258e8bc0b
 AUX opencv-4.6.0-fix-ffmpeg-5.patch 547 BLAKE2B a1257e21f8c9ae5255c9155d913c304907e9af0ba8ca1d2d2bb10e2cbe54c9c98aa495f15c6c7b999bbfe2ee9c47c2898859ed84a121fcdd1549c1d122779cc4 SHA512 75961c301082e21839ade53a9ec3adb8d8f2b150e1a87effa7a0d7de3b30ccc7f1506d94311740fc2c933666317e32953626cdd38acecfaff854124b1f8d4163
+AUX opencv-4.8.0-arm64-fp16.patch 12222 BLAKE2B 18803d74cb57bbc303c8159ca69d24dcc47d00a314bfdababeadd3b85306d289c8b35a1106011086931396e0ab82d98964434d0c0eaa34c25451c766ca9b9199 SHA512 1d9d42a711a1ae0ec7f1f641a68271d4b36f6b8701997a05d65c633d3531cad75e4edbf71c4f80cc59a9a14baa8ddebf19081dc67b99898dce13da8e3746227e
 AUX opencv-4.8.0-fix-flatbuffer.patch 1764 BLAKE2B 806f61bf7017fbcb5b5058686db9dd2272eb61d6a2952f0f029bc76d62172a18f9a21661426f13cfd5066fc60710b218bb9b2dfded61e33040469da0e0b72c94 SHA512 2679f52c3df16b7daac1fafa0b24796568a2555de03805ee6ee6b2e08a2be9f63fca44c1b472f5ffe2dfb7fe8b3d754b3ba39a71419974613a9b3de70a09cefb
 AUX opencv-4.8.0-fix-protobuf.patch 543 BLAKE2B 2c3ac7ddeda366cb117d5deddefe732dd6918bc54e468b1cc8d95e40bb9e5cde1776046a894e8f26416edffa39dc6c97547cbd688e0566587f9690e721b4c1e1 SHA512 799e3f3d019624476a285c2795d28f022bb126e9b9f511b153aef131a7cae2e3fb6361bd21b7ea34dfea984e3ef9e1a0a40d09a5a72a808a31e1eb15548b15a7
 DIST opencv-3.4.0-face_landmark_model.tar.gz 63299830 BLAKE2B 58f08cd8c030ee1c8b66e76a561fd625e112face427d001185f8d7c0eaace55adbd8474663021841a1382bfe393a210e64c51223441713cdb9156fac866a845c SHA512 2cd29ce42c08b2966db3ef4a2bc217ab3d07051819757ec6de8f3deea29e28f60abab39f101dba03f766f17018598e411bd687a7eaab0c681c113e10a2de2b23
@@ -20,6 +21,6 @@ DIST vgg_boostdesc-3.2.0.tar.gz 1867770 BLAKE2B 1fa5b58e73b6fa56ecf8d19af22298f7
 EBUILD opencv-4.6.0-r4.ebuild 21277 BLAKE2B 458906fe516524a8d997e6645d16ca3621774795271cb06aa2bb414a5937139db84beffb1dfa39d96215f45aa56c93461463a62f5f771cfc16396b3d926e049b SHA512 2d2b557fe6b260cd4c3ffbd4159cac68dafb9fc22138ff2f119350723a44f3376ee39f20d8c8b2d61952d88d9a60b470d36477552f0a1eb9b12eccd690547e6e
 EBUILD opencv-4.7.0-r1.ebuild 21289 BLAKE2B 0e2c1ac23fcd0ec261329eeeb73f035bb92bdd0d114b6123a9934d85937034e65e22a5264553cf294c66023a23e7bab7bb77560bb422a0d36ed726bc010afdd8 SHA512 332206b352f57fd16375b104f84e3019a8358ed174159f0827f51ac522ccdb62c360fcea1a94c2333adae5468acb95e78ad6897c3e8f9daa33292161794b737e
 EBUILD opencv-4.7.0.ebuild 21242 BLAKE2B 930ce7cdb362134193217aced8fb53e6695cde0d2abf1582db923c5bad84c01189eea5d76dc6bfd81e1bbcd0e286f94c2d17b59bb3ace0dd2cdbf26e21be6d17 SHA512 43f45361d176e7f34dc0c1d98dc459b351215b9a2db8d76390b384e774a95db8792386fecd35007ffee90d2a275f3daea010da1cea5e9728221f71d6c6a44164
-EBUILD opencv-4.8.0-r1.ebuild 21456 BLAKE2B 49b088c4ec805c010825ed91cbfe89521e938c50c0fba33ccdc812b5f36a920c79e7865f988b7b1241b3ac6fc06036c439b7708562e928e969c61fd8d0207361 SHA512 20bf41d3edae8b8722636114934c180c3f08701aea55e7a7f5daae4924dd543f7f3c80ad2dae1ae78b7e075bad4f709cdfdcdccb78e818879f0f67f36982343f
+EBUILD opencv-4.8.0-r1.ebuild 21500 BLAKE2B 0857dd0ebac0e6d9839031ec3c254d699923485d51cda623c38731ee356b57b6ce0f9401f0f1627db4ef9c769af0d86cc208145da89102c3b63179bf21c6002c SHA512 0b3a550504a762aa99b04ddd74adbd9069ca103bc6de47d03987ffced8e8a5188eeea62e78737a38339e98e03700fe0944cefee1cff3504967fcfadfd317336e
 EBUILD opencv-4.8.0.ebuild 21266 BLAKE2B f7eef7599c1dba1eeafc61ff2717d47229c64ce3c9592da01a2ac8d0e75e6c7158c1b66346db77d969e573d5f7e50880b42827b185184c5fa571b7cc7b4dc747 SHA512 a47347e0966585c4b5789ddb11373e5b9b7559e26a8ab401de25b2f427b85cdbf7a9d47ea53d654e79bd44fece38d45f437ad91bdbe872410b9cd881192d3055
 MISC metadata.xml 3147 BLAKE2B 37bb20a0fd752e1e3d4fae7f1b549be49648cb45cc3c6537a9bdb01b609cbab68a443bc17ded790bedd3afa800d1ce16c4220ebdd42a0d9dd73dacf415d29b5f SHA512 e9cf8dc6a63be3708ee595149122ffb2ef9722402716ac4a100c971a39c17e82e3d32253c03f56dd4814202a0727b29d8fb0879605a71c15e0a3f8e01e152069
diff --git a/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch b/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch
new file mode 100644
index 000000000000..6bf04daf58ae
--- /dev/null
+++ b/media-libs/opencv/files/opencv-4.8.0-arm64-fp16.patch
@@ -0,0 +1,272 @@
+https://github.com/opencv/opencv/pull/24203
+
+From 689fa6f372975d58e9f50fd17a0abd105b1815f1 Mon Sep 17 00:00:00 2001
+From: Sam James <sam@gentoo.org>
+Date: Mon, 28 Aug 2023 04:20:58 +0100
+Subject: [PATCH] Fix compilation on arm64 with FP16 when disabled
+
+If building with -mcpu=native or any other setting which implies the current
+CPU has FP16 but with intrinsics disabled, we mistakenly try to use it even
+though convolution.hpp conditionally defines it correctly based on whether
+we should *use it*. convolution.cpp on the other hand was mismatched and
+trying to use it if the CPU supported it, even if not enabled in the build
+system.
+
+Make the guards match.
+
+Bug: https://bugs.gentoo.org/913031
+Signed-off-by: Sam James <sam@gentoo.org>
+--- a/modules/dnn/src/layers/cpu_kernels/convolution.cpp
++++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp
+@@ -118,7 +118,7 @@ Ptr<FastConv> initFastConv(
+     const size_t wstep = weightsMat.step1();
+ 
+     conv->useFP16 = false;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+     // TODO: add FP16 support for Winograd.
+     if (_useFP16 && (conv->conv_type == CONV_TYPE_GENERIC || conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN))
+         conv->useFP16 = true;
+@@ -137,7 +137,7 @@ Ptr<FastConv> initFastConv(
+         int padded_ksize = ((ksize + VEC_ALIGN-1) / VEC_ALIGN) * VEC_ALIGN;
+         int nweights = C * padded_ksize;
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+         if (conv->useFP16)
+         {
+             conv->weightsBuf_FP16.resize(nweights + VEC_ALIGN);
+@@ -190,7 +190,7 @@ Ptr<FastConv> initFastConv(
+ #endif
+         const int CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32; // for AVX2, it is 8, otherwise, it's 16.
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+         // FP 16
+         const int CONV_WINO_ATOM_F16 = CONV_WINO_ATOM_F32 * 2;
+         const int CONV_WINO_NATOMS_F16 = CONV_WINO_AREA / CONV_WINO_ATOM_F16;
+@@ -208,7 +208,7 @@ Ptr<FastConv> initFastConv(
+         size_t nweights = ngroups*Kg_nblocks*Cg*CONV_WINO_KBLOCK*CONV_WINO_AREA;
+ 
+         float* wptrWino = nullptr;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+         float16_t* wptrWino_FP16 = nullptr;
+         if (conv->useFP16)
+         {
+@@ -264,7 +264,7 @@ Ptr<FastConv> initFastConv(
+                 }
+ 
+                 // repack the data.
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                 if (conv->useFP16)
+                 {
+                     float16_t* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA +
+@@ -308,7 +308,7 @@ Ptr<FastConv> initFastConv(
+ 
+         float* weightsBufPtr = nullptr;
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+         int numStripsMR_FP16 = (Kg + CONV_MR_FP16 - 1) / CONV_MR_FP16;
+         int Kg_aligned_FP16 = numStripsMR_FP16 * CONV_MR_FP16;
+         size_t nweights_FP16 = ngroups * Kg_aligned_FP16 * DkHkWkCg;
+@@ -331,7 +331,7 @@ Ptr<FastConv> initFastConv(
+         }
+ 
+         // Pack the weight.
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+         if (conv->useFP16)
+         {
+             parallel_for_(Range(0, ngroups * numStripsMR_FP16), [&](const Range& r0){
+@@ -415,7 +415,7 @@ static inline void packData8(char*& inpbuf, float*& inptrIn, int& in_w, int& x0,
+     char * inpbufC = inpbuf + s0 * esz;
+     float* inptrInC = (float* )inptrIn;
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+     float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
+     if (esz == sizeof(float16_t))
+     {
+@@ -521,7 +521,7 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0
+     char* inpbufC = inpbuf + s0 * esz;
+     float* inptrInC = inptrIn;
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+     float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
+     if (esz == sizeof(float16_t))
+     {
+@@ -553,7 +553,7 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0
+     in_w += stride_w;
+ }
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+ // Fast convert float 32 to float16
+ static inline void _cvt32f16f( const float* src, float16_t* dst, int len)
+ {
+@@ -623,7 +623,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+             {
+                 // Make special branch where memcpy() is called with a constant buffer size.
+                 // Compilers will likely unroll this loop properly.
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                 if (useFP16)
+                 {
+                     for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
+@@ -636,7 +636,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+             }
+             else
+             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                 if (useFP16)
+                 {
+                     for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
+@@ -700,7 +700,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             int w0 = std::max(0, (-in_w + dilation_w-1)/dilation_w);
+                             int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w);
+                             const float* inptrInC = inptrIn;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                             if (useFP16)
+                             {
+                                 float16_t* inpbufC = (float16_t *)inpbuf + s0;
+@@ -761,7 +761,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w);
+ 
+                             const float* inptrInC = inptrIn;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                             if (useFP16)
+                             {
+                                 float16_t* inpbufC = (float16_t *)inpbuf + s0;
+@@ -834,7 +834,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             int w0 = std::max(0, (-in_w + dilation_w-1)/dilation_w);
+                             int w1 = std::min(Wk, (Wi - in_w + dilation_w-1)/dilation_w);
+                             const float* inptrInC = inptrIn;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                             if (useFP16)
+                             {
+                                 float16_t* inpbufC = (float16_t* )inpbuf + s0;
+@@ -887,7 +887,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                 for (; i < CONV_NR;)
+                 {
+                     float* inpbuf_ki = (float* )inpbuf + k * CONV_NR * Cg + i;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                     float16_t * inpbuf_ki_FP16 = (float16_t *)inpbuf + k * CONV_NR * Cg + i;
+ #endif
+ 
+@@ -903,7 +903,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                         {
+                             if (stride_w == 1)
+                             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                 if (useFP16)
+                                 {
+                                     for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -934,7 +934,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             }
+                             else if (stride_w == 2)
+                             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                 if (useFP16)
+                                 {
+                                     for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -967,7 +967,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             }
+                             else
+                             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                 if (useFP16)
+                                 {
+                                     for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -1006,7 +1006,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                         {
+                             if (stride_w == 1)
+                             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                 if (useFP16)
+                                 {
+                                     for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -1029,7 +1029,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                             }
+                             else
+                             {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                 if (useFP16)
+                                 {
+                                     for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -1057,7 +1057,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                         }
+                         else
+                         {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                             if (useFP16)
+                             {
+                                 for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
+@@ -1073,7 +1073,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
+                     }
+                     else
+                     {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                         if (useFP16)
+                         {
+                             for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR)
+@@ -1260,7 +1260,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
+     int CONV_MR = CONV_MR_FP32;
+     int esz = sizeof(float );
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+     if (useFP16)
+     {
+         // works at FP 16.
+@@ -1433,7 +1433,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
+                 }
+ 
+                 char *weights = nullptr;
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                 if (useFP16)
+                 {
+                     CV_Assert(!conv->weightsBuf_FP16.empty());
+@@ -1474,7 +1474,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
+ #if CV_NEON && CV_NEON_AARCH64
+                         if (conv->useNEON)
+                         {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                             if (useFP16)
+                             {
+                                 opt_NEON::convBlockMR1_FP16(DkHkWkCg, weights, inptr, cptr, biasVal, fusedAdd, minval, maxval, ifMinMaxAct, outLen, CONV_NR);
+@@ -1537,7 +1537,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
+ #if CV_NEON
+                                 if (conv->useNEON)
+                                 {
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                                     if (useFP16)
+                                     {
+                                         opt_NEON::convBlock_FP16(c1 - c0, wptr, inptr, (char *)cptr_f16, ldc, c0 == 0, outLen, CONV_MR, CONV_NR);
+@@ -1567,7 +1567,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
+                         float biasval = biasptr[k];
+                         int j = 0;
+ 
+-#ifdef CONV_ARM_FP16
++#if defined(CONV_ARM_FP16) && CV_FP16
+                         if (useFP16)
+                         {
+                             float32x4_t vbias = vdupq_n_f32(biasval);
diff --git a/media-libs/opencv/opencv-4.8.0-r1.ebuild b/media-libs/opencv/opencv-4.8.0-r1.ebuild
index 846e57c7514b..27cec3eb3fa4 100644
--- a/media-libs/opencv/opencv-4.8.0-r1.ebuild
+++ b/media-libs/opencv/opencv-4.8.0-r1.ebuild
@@ -294,6 +294,7 @@ PATCHES=(
 	"${FILESDIR}"/${PN}-4.5.0-link-with-cblas-for-lapack.patch
 	"${FILESDIR}"/${PN}-4.8.0-fix-protobuf.patch
 	"${FILESDIR}"/${PN}-4.8.0-fix-flatbuffer.patch
+	"${FILESDIR}"/${PN}-4.8.0-arm64-fp16.patch
 )
 
 pkg_pretend() {
-- 
cgit v1.2.3