summaryrefslogtreecommitdiff
path: root/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch
diff options
context:
space:
mode:
Diffstat (limited to 'x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch')
-rw-r--r--x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch354
1 files changed, 354 insertions, 0 deletions
diff --git a/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch b/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch
new file mode 100644
index 000000000000..e1239ff11a2c
--- /dev/null
+++ b/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch
@@ -0,0 +1,354 @@
+From cdf2915c810e77e1b77071e46acd6d9c696c4473 Mon Sep 17 00:00:00 2001
+From: Matt Turner <mattst88@gmail.com>
+Date: Thu, 21 Mar 2024 14:41:50 -0400
+Subject: [PATCH] lib: Inline igt_x86_features() into ifunc resolvers
+
+Quoting https://sourceware.org/glibc/wiki/GNU_IFUNC
+
+> When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be
+> immediately resolved at startup. In cases where an external function
+> call depends needs to be made that may fail if such a call has not
+> been initialized yet (PLT-based relocation which is processed later).
+> For example calling strlen in an IFUNC resolver built with -Wl,z,now
+> may lead to a segfault because the PLT is not yet resolved.
+
+We cannot rely on function calls through the PLT in ifunc resolvers as
+the PLT may not have been initialized yet.
+
+In practice, this causes crashes when igt is linked with -Wl,-z,now or
+when linked with the mold linker.
+
+To avoid this problem, we do two things:
+ 1. move igt_x86_features() to igt_x86.h so its definition is
+ available to compilation units that call the function.
+ 2. mark the ifunc resolvers with __attribute__((flatten)) to ensure
+ igt_x86_features() is inlined. Since this function is only called
+ from a few places it does not significantly increase binary size
+ to inline it.
+
+Linux distros (at least Fedora since v23, Gentoo/Hardened, soon standard
+Gentoo) use `-Wl,-z now` to improve security. By binding upfront, the
+loader can mark the GOT as read-only for a security enhancement. See
+https://wiki.gentoo.org/wiki/Hardened/Toolchain for more details.
+
+[Fixed whitespaces (Kamil)]
+Bug: https://bugs.gentoo.org/788625
+Bug: https://bugs.gentoo.org/925348
+Reviewed-by: Zbigniew KempczyƄski <zbigniew.kempczynski@intel.com>
+Signed-off-by: Matt Turner <mattst88@gmail.com>
+---
+ lib/igt_halffloat.c | 8 +++
+ lib/igt_x86.c | 119 ++------------------------------------------
+ lib/igt_x86.h | 118 ++++++++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 129 insertions(+), 116 deletions(-)
+
+diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c
+index 5dbe08e01..560952d20 100644
+--- a/lib/igt_halffloat.c
++++ b/lib/igt_halffloat.c
+@@ -194,6 +194,10 @@ static void half_to_float(const uint16_t *h, float *f, unsigned int num)
+ f[i] = _half_to_float(h[i]);
+ }
+
++/* The PLT is not initialized when ifunc resolvers run, so all external
++ * functions must be inlined with __attribute__((flatten)).
++ */
++__attribute__((flatten))
+ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num)
+ {
+ if (igt_x86_features() & F16C)
+@@ -205,6 +209,10 @@ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned
+ void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
+ __attribute__((ifunc("resolve_float_to_half")));
+
++/* The PLT is not initialized when ifunc resolvers run, so all external
++ * functions must be inlined with __attribute__((flatten)).
++ */
++__attribute__((flatten))
+ static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num)
+ {
+ if (igt_x86_features() & F16C)
+diff --git a/lib/igt_x86.c b/lib/igt_x86.c
+index 8c102fd13..4842164e9 100644
+--- a/lib/igt_x86.c
++++ b/lib/igt_x86.c
+@@ -27,14 +27,6 @@
+
+ #include "config.h"
+
+-#ifdef HAVE_CPUID_H
+-#include <cpuid.h>
+-#else
+-#define __get_cpuid_max(x, y) 0
+-#define __cpuid(level, a, b, c, d) a = b = c = d = 0
+-#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
+-#endif
+-
+ #include "igt_x86.h"
+ #include "igt_aux.h"
+
+@@ -49,114 +41,7 @@
+ * @include: igt_x86.h
+ */
+
+-#define BASIC_CPUID 0x0
+-#define EXTENDED_CPUID 0x80000000
+-
+-#ifndef bit_MMX
+-#define bit_MMX (1 << 23)
+-#endif
+-
+-#ifndef bit_SSE
+-#define bit_SSE (1 << 25)
+-#endif
+-
+-#ifndef bit_SSE2
+-#define bit_SSE2 (1 << 26)
+-#endif
+-
+-#ifndef bit_SSE3
+-#define bit_SSE3 (1 << 0)
+-#endif
+-
+-#ifndef bit_SSSE3
+-#define bit_SSSE3 (1 << 9)
+-#endif
+-
+-#ifndef bit_SSE4_1
+-#define bit_SSE4_1 (1 << 19)
+-#endif
+-
+-#ifndef bit_SSE4_2
+-#define bit_SSE4_2 (1 << 20)
+-#endif
+-
+-#ifndef bit_OSXSAVE
+-#define bit_OSXSAVE (1 << 27)
+-#endif
+-
+-#ifndef bit_AVX
+-#define bit_AVX (1 << 28)
+-#endif
+-
+-#ifndef bit_F16C
+-#define bit_F16C (1 << 29)
+-#endif
+-
+-#ifndef bit_AVX2
+-#define bit_AVX2 (1<<5)
+-#endif
+-
+-#define xgetbv(index,eax,edx) \
+- __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
+-
+-#define has_YMM 0x1
+-
+ #if defined(__x86_64__) || defined(__i386__)
+-unsigned igt_x86_features(void)
+-{
+- unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
+- unsigned eax, ebx, ecx, edx;
+- unsigned features = 0;
+- unsigned extra = 0;
+-
+- if (max >= 1) {
+- __cpuid(1, eax, ebx, ecx, edx);
+-
+- if (ecx & bit_SSE3)
+- features |= SSE3;
+-
+- if (ecx & bit_SSSE3)
+- features |= SSSE3;
+-
+- if (ecx & bit_SSE4_1)
+- features |= SSE4_1;
+-
+- if (ecx & bit_SSE4_2)
+- features |= SSE4_2;
+-
+- if (ecx & bit_OSXSAVE) {
+- unsigned int bv_eax, bv_ecx;
+- xgetbv(0, bv_eax, bv_ecx);
+- if ((bv_eax & 6) == 6)
+- extra |= has_YMM;
+- }
+-
+- if ((extra & has_YMM) && (ecx & bit_AVX))
+- features |= AVX;
+-
+- if (edx & bit_MMX)
+- features |= MMX;
+-
+- if (edx & bit_SSE)
+- features |= SSE;
+-
+- if (edx & bit_SSE2)
+- features |= SSE2;
+-
+- if (ecx & bit_F16C)
+- features |= F16C;
+- }
+-
+- if (max >= 7) {
+- __cpuid_count(7, 0, eax, ebx, ecx, edx);
+-
+- if ((extra & has_YMM) && (ebx & bit_AVX2))
+- features |= AVX2;
+- }
+-
+- return features;
+-}
+-
+ char *igt_x86_features_to_string(unsigned features, char *line)
+ {
+ char *ret = line;
+@@ -284,6 +169,10 @@ static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
+ memcpy(dst, src, len);
+ }
+
++/* The PLT is not initialized when ifunc resolvers run, so all external
++ * functions must be inlined with __attribute__((flatten)).
++ */
++__attribute__((flatten))
+ static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
+ {
+ if (igt_x86_features() & SSE4_1)
+diff --git a/lib/igt_x86.h b/lib/igt_x86.h
+index c7b84dec2..f1df4c1a9 100644
+--- a/lib/igt_x86.h
++++ b/lib/igt_x86.h
+@@ -30,6 +30,14 @@
+ #ifndef IGT_X86_H
+ #define IGT_X86_H
+
++#ifdef HAVE_CPUID_H
++#include <cpuid.h>
++#else
++#define __get_cpuid_max(x, y) 0
++#define __cpuid(level, a, b, c, d) a = b = c = d = 0
++#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
++#endif
++
+ #define MMX 0x1
+ #define SSE 0x2
+ #define SSE2 0x4
+@@ -42,7 +50,115 @@
+ #define F16C 0x200
+
+ #if defined(__x86_64__) || defined(__i386__)
+-unsigned igt_x86_features(void);
++
++#define BASIC_CPUID 0x0
++#define EXTENDED_CPUID 0x80000000
++
++#ifndef bit_MMX
++#define bit_MMX (1 << 23)
++#endif
++
++#ifndef bit_SSE
++#define bit_SSE (1 << 25)
++#endif
++
++#ifndef bit_SSE2
++#define bit_SSE2 (1 << 26)
++#endif
++
++#ifndef bit_SSE3
++#define bit_SSE3 (1 << 0)
++#endif
++
++#ifndef bit_SSSE3
++#define bit_SSSE3 (1 << 9)
++#endif
++
++#ifndef bit_SSE4_1
++#define bit_SSE4_1 (1 << 19)
++#endif
++
++#ifndef bit_SSE4_2
++#define bit_SSE4_2 (1 << 20)
++#endif
++
++#ifndef bit_OSXSAVE
++#define bit_OSXSAVE (1 << 27)
++#endif
++
++#ifndef bit_AVX
++#define bit_AVX (1 << 28)
++#endif
++
++#ifndef bit_F16C
++#define bit_F16C (1 << 29)
++#endif
++
++#ifndef bit_AVX2
++#define bit_AVX2 (1<<5)
++#endif
++
++#define xgetbv(index, eax, edx) \
++ __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
++
++#define has_YMM 0x1
++
++static inline unsigned igt_x86_features(void)
++{
++ unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
++ unsigned eax, ebx, ecx, edx;
++ unsigned features = 0;
++ unsigned extra = 0;
++
++ if (max >= 1) {
++ __cpuid(1, eax, ebx, ecx, edx);
++
++ if (ecx & bit_SSE3)
++ features |= SSE3;
++
++ if (ecx & bit_SSSE3)
++ features |= SSSE3;
++
++ if (ecx & bit_SSE4_1)
++ features |= SSE4_1;
++
++ if (ecx & bit_SSE4_2)
++ features |= SSE4_2;
++
++ if (ecx & bit_OSXSAVE) {
++ unsigned int bv_eax, bv_ecx;
++
++ xgetbv(0, bv_eax, bv_ecx);
++ if ((bv_eax & 6) == 6)
++ extra |= has_YMM;
++ }
++
++ if ((extra & has_YMM) && (ecx & bit_AVX))
++ features |= AVX;
++
++ if (edx & bit_MMX)
++ features |= MMX;
++
++ if (edx & bit_SSE)
++ features |= SSE;
++
++ if (edx & bit_SSE2)
++ features |= SSE2;
++
++ if (ecx & bit_F16C)
++ features |= F16C;
++ }
++
++ if (max >= 7) {
++ __cpuid_count(7, 0, eax, ebx, ecx, edx);
++
++ if ((extra & has_YMM) && (ebx & bit_AVX2))
++ features |= AVX2;
++ }
++
++ return features;
++}
++
+ char *igt_x86_features_to_string(unsigned features, char *line);
+ #else
+ static inline unsigned igt_x86_features(void)
+--
+2.43.2
+