summaryrefslogtreecommitdiff
path: root/sci-libs/openblas/files
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2022-11-23 01:07:24 +0000
committerV3n3RiX <venerix@koprulu.sector>2022-11-23 01:07:24 +0000
commit57c2e006d0af9ab77ced676461d7100cade4a716 (patch)
treefedd61a96505af97dea0e78425b1b7a70b813a46 /sci-libs/openblas/files
parent664924d8659185e3b43f99ee8a0b88177efb93dc (diff)
gentoo auto-resync : 23:11:2022 - 01:07:24
Diffstat (limited to 'sci-libs/openblas/files')
-rw-r--r--sci-libs/openblas/files/openblas-0.3.21-clang16.patch581
1 files changed, 581 insertions, 0 deletions
diff --git a/sci-libs/openblas/files/openblas-0.3.21-clang16.patch b/sci-libs/openblas/files/openblas-0.3.21-clang16.patch
new file mode 100644
index 000000000000..051966b0a870
--- /dev/null
+++ b/sci-libs/openblas/files/openblas-0.3.21-clang16.patch
@@ -0,0 +1,581 @@
+https://github.com/xianyi/OpenBLAS/commit/f703846ad9400a8ea175cb8dd43e18c152aeab93
+https://github.com/xianyi/OpenBLAS/commit/515cf269291bec0d43651fe7bf99a71fb074a0ad
+https://github.com/xianyi/OpenBLAS/commit/91110f92d218492d0efbdc1fdf34277ca45f4b36
+https://github.com/xianyi/OpenBLAS/commit/9402df5604e69f86f58953e3883f33f98c930baf
+https://github.com/xianyi/OpenBLAS/commit/101a2c77c3f3610933f450cefca3e312edab2186
+https://src.fedoraproject.org/rpms/openblas/c/5f27d51cebe1c1bb6598d38326ece8dc0ac71ec7?branch=rawhide
+
+From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Sat, 13 Aug 2022 11:38:27 +0200
+Subject: [PATCH] Add function prototypes
+
+--- a/exports/gensymbol
++++ b/exports/gensymbol
+@@ -4000,6 +4000,22 @@ case "$p1" in
+ no_underscore_objs="$no_underscore_objs $misc_common_objs"
+
+ printf 'int main(void){\n'
++ for obj in $underscore_objs; do
++ [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
++ "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
++ done
++
++ for obj in $need_2underscore_objs; do
++ printf 'extern void %s%s%s%s%s();\n' \
++ "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
++ done
++
++ for obj in $no_underscore_objs; do
++ printf 'extern void %s%s%s();\n' \
++ "$symbolprefix" "$obj" "$symbolsuffix"
++ done
++
++ printf '\n'
+ for obj in $underscore_objs; do
+ [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
+ "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
+--- a/exports/gensymbol.pl
++++ b/exports/gensymbol.pl
+@@ -3955,6 +3955,18 @@
+ @no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
+
+ print "int main(void){\n";
++ foreach $objs (@underscore_objs) {
++ print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
++ }
++
++ foreach $objs (@need_2underscore_objs) {
++ print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
++ }
++
++ foreach $objs (@no_underscore_objs) {
++ print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
++ }
++
+ foreach $objs (@underscore_objs) {
+ print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
+ }
+
+From 515cf269291bec0d43651fe7bf99a71fb074a0ad Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 14 Sep 2022 11:48:36 +0200
+Subject: [PATCH] Fix pointer/integer argument mismatch in calls to pow()
+
+--- a/lapack-netlib/SRC/claed0.c
++++ b/lapack-netlib/SRC/claed0.c
+@@ -796,10 +796,10 @@ f"> */
+
+ temp = log((real) (*n)) / log(2.f);
+ lgn = (integer) temp;
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ iprmpt = indxq + *n + 1;
+--- a/lapack-netlib/SRC/claed7.c
++++ b/lapack-netlib/SRC/claed7.c
+@@ -864,11 +864,11 @@ f"> */
+ /* Form the z-vector which consists of the last row of Q_1 and the */
+ /* first row of Q_2. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = *tlvls - i__;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L10: */
+ }
+ curr = ptr + *curpbm;
+--- a/lapack-netlib/SRC/clalsa.c
++++ b/lapack-netlib/SRC/clalsa.c
+@@ -1051,7 +1051,7 @@ f"> */
+ /* Finally go through the left singular vector matrices of all */
+ /* the other subproblems bottom-up on the tree. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ sqre = 0;
+
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+@@ -1065,7 +1065,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+@@ -1110,7 +1110,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__2 = lvl - 1;
+- lf = pow_ii(&c__2, &i__2);
++ lf = pow_ii(c__2, i__2);
+ ll = (lf << 1) - 1;
+ }
+ i__2 = lf;
+--- a/lapack-netlib/SRC/cstedc.c
++++ b/lapack-netlib/SRC/cstedc.c
+@@ -836,10 +836,10 @@ f"> */
+ lrwmin = *n - 1 << 1;
+ } else if (icompz == 1) {
+ lgn = (integer) (log((real) (*n)) / log(2.f));
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ lwmin = *n * *n;
+--- a/lapack-netlib/SRC/dlaed0.c
++++ b/lapack-netlib/SRC/dlaed0.c
+@@ -827,10 +827,10 @@ f"> */
+
+ temp = log((doublereal) (*n)) / log(2.);
+ lgn = (integer) temp;
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ iprmpt = indxq + *n + 1;
+--- a/lapack-netlib/SRC/dlaed7.c
++++ b/lapack-netlib/SRC/dlaed7.c
+@@ -885,11 +885,11 @@ f"> */
+ /* Form the z-vector which consists of the last row of Q_1 and the */
+ /* first row of Q_2. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = *tlvls - i__;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L10: */
+ }
+ curr = ptr + *curpbm;
+--- a/lapack-netlib/SRC/dlaeda.c
++++ b/lapack-netlib/SRC/dlaeda.c
+@@ -754,7 +754,7 @@ f"> */
+ /* scheme */
+
+ i__1 = *curlvl - 1;
+- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
++ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
+
+ /* Determine size of these matrices. We add HALF to the value of */
+ /* the SQRT in case the machine underestimates one of these square */
+@@ -781,12 +781,12 @@ f"> */
+ /* rotations and permutation and then multiplying the center matrices */
+ /* against the current Z. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (k = 1; k <= i__1; ++k) {
+ i__2 = *curlvl - k;
+ i__3 = *curlvl - k - 1;
+- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
++ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
+ 1;
+ psiz1 = prmptr[curr + 1] - prmptr[curr];
+ psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+@@ -847,7 +847,7 @@ f"> */
+ c__1);
+
+ i__2 = *tlvls - k;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L70: */
+ }
+
+--- a/lapack-netlib/SRC/dlalsa.c
++++ b/lapack-netlib/SRC/dlalsa.c
+@@ -951,7 +951,7 @@ f"> */
+ /* Finally go through the left singular vector matrices of all */
+ /* the other subproblems bottom-up on the tree. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ sqre = 0;
+
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+@@ -965,7 +965,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+@@ -1010,7 +1010,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__2 = lvl - 1;
+- lf = pow_ii(&c__2, &i__2);
++ lf = pow_ii(c__2, i__2);
+ ll = (lf << 1) - 1;
+ }
+ i__2 = lf;
+--- a/lapack-netlib/SRC/dlasd0.c
++++ b/lapack-netlib/SRC/dlasd0.c
+@@ -824,7 +824,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+--- a/lapack-netlib/SRC/dlasda.c
++++ b/lapack-netlib/SRC/dlasda.c
+@@ -1027,7 +1027,7 @@ f"> */
+
+ /* Now conquer each subproblem bottom-up. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+ lvl2 = (lvl << 1) - 1;
+
+@@ -1039,7 +1039,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+--- a/lapack-netlib/SRC/dstedc.c
++++ b/lapack-netlib/SRC/dstedc.c
+@@ -806,10 +806,10 @@ f"> */
+ lwmin = *n - 1 << 1;
+ } else {
+ lgn = (integer) (log((doublereal) (*n)) / log(2.));
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ if (icompz == 1) {
+--- a/lapack-netlib/SRC/slaed0.c
++++ b/lapack-netlib/SRC/slaed0.c
+@@ -823,10 +823,10 @@ f"> */
+
+ temp = log((real) (*n)) / log(2.f);
+ lgn = (integer) temp;
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ iprmpt = indxq + *n + 1;
+--- a/lapack-netlib/SRC/slaed7.c
++++ b/lapack-netlib/SRC/slaed7.c
+@@ -883,11 +883,11 @@ f"> */
+ /* Form the z-vector which consists of the last row of Q_1 and the */
+ /* first row of Q_2. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = *tlvls - i__;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L10: */
+ }
+ curr = ptr + *curpbm;
+--- a/lapack-netlib/SRC/slaeda.c
++++ b/lapack-netlib/SRC/slaeda.c
+@@ -753,7 +753,7 @@ f"> */
+ /* scheme */
+
+ i__1 = *curlvl - 1;
+- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
++ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
+
+ /* Determine size of these matrices. We add HALF to the value of */
+ /* the SQRT in case the machine underestimates one of these square */
+@@ -779,12 +779,12 @@ f"> */
+ /* rotations and permutation and then multiplying the center matrices */
+ /* against the current Z. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (k = 1; k <= i__1; ++k) {
+ i__2 = *curlvl - k;
+ i__3 = *curlvl - k - 1;
+- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
++ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
+ 1;
+ psiz1 = prmptr[curr + 1] - prmptr[curr];
+ psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+@@ -844,7 +844,7 @@ f"> */
+ c__1);
+
+ i__2 = *tlvls - k;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L70: */
+ }
+
+--- a/lapack-netlib/SRC/slalsa.c
++++ b/lapack-netlib/SRC/slalsa.c
+@@ -946,7 +946,7 @@ f"> */
+ /* Finally go through the left singular vector matrices of all */
+ /* the other subproblems bottom-up on the tree. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ sqre = 0;
+
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+@@ -960,7 +960,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+@@ -1005,7 +1005,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__2 = lvl - 1;
+- lf = pow_ii(&c__2, &i__2);
++ lf = pow_ii(c__2, i__2);
+ ll = (lf << 1) - 1;
+ }
+ i__2 = lf;
+--- a/lapack-netlib/SRC/slasd0.c
++++ b/lapack-netlib/SRC/slasd0.c
+@@ -821,7 +821,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+--- a/lapack-netlib/SRC/slasda.c
++++ b/lapack-netlib/SRC/slasda.c
+@@ -1023,7 +1023,7 @@ f"> */
+
+ /* Now conquer each subproblem bottom-up. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+ lvl2 = (lvl << 1) - 1;
+
+@@ -1035,7 +1035,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+--- a/lapack-netlib/SRC/sstedc.c
++++ b/lapack-netlib/SRC/sstedc.c
+@@ -804,10 +804,10 @@ f"> */
+ lwmin = *n - 1 << 1;
+ } else {
+ lgn = (integer) (log((real) (*n)) / log(2.f));
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ if (icompz == 1) {
+--- a/lapack-netlib/SRC/zlaed0.c
++++ b/lapack-netlib/SRC/zlaed0.c
+@@ -793,10 +793,10 @@ f"> */
+
+ temp = log((doublereal) (*n)) / log(2.);
+ lgn = (integer) temp;
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ iprmpt = indxq + *n + 1;
+--- a/lapack-netlib/SRC/zlaed7.c
++++ b/lapack-netlib/SRC/zlaed7.c
+@@ -864,11 +864,11 @@ f"> */
+ /* Form the z-vector which consists of the last row of Q_1 and the */
+ /* first row of Q_2. */
+
+- ptr = pow_ii(&c__2, tlvls) + 1;
++ ptr = pow_ii(c__2, *tlvls) + 1;
+ i__1 = *curlvl - 1;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = *tlvls - i__;
+- ptr += pow_ii(&c__2, &i__2);
++ ptr += pow_ii(c__2, i__2);
+ /* L10: */
+ }
+ curr = ptr + *curpbm;
+--- a/lapack-netlib/SRC/zlalsa.c
++++ b/lapack-netlib/SRC/zlalsa.c
+@@ -1051,7 +1051,7 @@ f"> */
+ /* Finally go through the left singular vector matrices of all */
+ /* the other subproblems bottom-up on the tree. */
+
+- j = pow_ii(&c__2, &nlvl);
++ j = pow_ii(c__2, nlvl);
+ sqre = 0;
+
+ for (lvl = nlvl; lvl >= 1; --lvl) {
+@@ -1065,7 +1065,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__1 = lvl - 1;
+- lf = pow_ii(&c__2, &i__1);
++ lf = pow_ii(c__2, i__1);
+ ll = (lf << 1) - 1;
+ }
+ i__1 = ll;
+@@ -1110,7 +1110,7 @@ f"> */
+ ll = 1;
+ } else {
+ i__2 = lvl - 1;
+- lf = pow_ii(&c__2, &i__2);
++ lf = pow_ii(c__2, i__2);
+ ll = (lf << 1) - 1;
+ }
+ i__2 = lf;
+--- a/lapack-netlib/SRC/zstedc.c
++++ b/lapack-netlib/SRC/zstedc.c
+@@ -836,10 +836,10 @@ f"> */
+ lrwmin = *n - 1 << 1;
+ } else if (icompz == 1) {
+ lgn = (integer) (log((doublereal) (*n)) / log(2.));
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+- if (pow_ii(&c__2, &lgn) < *n) {
++ if (pow_ii(c__2, lgn) < *n) {
+ ++lgn;
+ }
+ lwmin = *n * *n;
+
+From 91110f92d218492d0efbdc1fdf34277ca45f4b36 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 14 Sep 2022 14:03:31 +0200
+Subject: [PATCH] fix missing return type in function declaration
+
+--- a/ctest/c_sblat1c.c
++++ b/ctest/c_sblat1c.c
+@@ -969,7 +969,7 @@ real *sfac;
+ 1.17 };
+
+ /* Local variables */
+- extern /* Subroutine */ srottest_();
++ extern /* Subroutine */ void srottest_();
+ static integer i__, k, ksize;
+ extern /* Subroutine */ int stest_(), srotmtest_();
+ static integer ki, kn;
+
+From 9402df5604e69f86f58953e3883f33f98c930baf Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 14 Sep 2022 21:44:34 +0200
+Subject: [PATCH] Fix missing external declaration
+
+--- a/driver/others/blas_server_omp.c
++++ b/driver/others/blas_server_omp.c
+@@ -69,6 +69,8 @@
+
+ int blas_server_avail = 0;
+
++extern int openblas_omp_adaptive_env();
++
+ static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
+ #ifdef HAVE_C11
+ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
+
+From 101a2c77c3f3610933f450cefca3e312edab2186 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Thu, 15 Sep 2022 09:19:19 +0200
+Subject: [PATCH] Fix warnings
+
+--- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c
++++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c
+@@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
+ FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
+ FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
+ FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
+- FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
+- FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
+- FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
+- FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
+- FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
+- FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
+- FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
+- FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
+- FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
+- FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
+- FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
+- FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
++ FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
++ FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
++ FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
++ FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
++ FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
++ FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
++ FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
++ FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
++ FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
++ FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
++ FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
++ FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;
+
+
+ aoffset = a;
+--- a/kernel/x86_64/omatcopy_rt.c
++++ b/kernel/x86_64/omatcopy_rt.c
+@@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
+ }
+ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
+- float *src, *dst, *dst_tmp, *src_base, *dst_base;
++ float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
+ uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
+ BLASLONG cols_left, rows_done; float ALPHA = alpha;
+ if(ALPHA==0.0){
+