diff options
Diffstat (limited to 'x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch')
-rw-r--r-- | x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch b/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch new file mode 100644 index 000000000000..e1239ff11a2c --- /dev/null +++ b/x11-apps/igt-gpu-tools/files/1.28-lib-Inline-igt_x86_features-into-ifunc-resolvers.patch @@ -0,0 +1,354 @@ +From cdf2915c810e77e1b77071e46acd6d9c696c4473 Mon Sep 17 00:00:00 2001 +From: Matt Turner <mattst88@gmail.com> +Date: Thu, 21 Mar 2024 14:41:50 -0400 +Subject: [PATCH] lib: Inline igt_x86_features() into ifunc resolvers + +Quoting https://sourceware.org/glibc/wiki/GNU_IFUNC + +> When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be +> immediately resolved at startup. In cases where an external function +> call depends needs to be made that may fail if such a call has not +> been initialized yet (PLT-based relocation which is processed later). +> For example calling strlen in an IFUNC resolver built with -Wl,z,now +> may lead to a segfault because the PLT is not yet resolved. + +We cannot rely on function calls through the PLT in ifunc resolvers as +the PLT may not have been initialized yet. + +In practice, this causes crashes when igt is linked with -Wl,-z,now or +when linked with the mold linker. + +To avoid this problem, we do two things: + 1. move igt_x86_features() to igt_x86.h so its definition is + available to compilation units that call the function. + 2. mark the ifunc resolvers with __attribute__((flatten)) to ensure + igt_x86_features() is inlined. Since this function is only called + from a few places it does not significantly increase binary size + to inline it. + +Linux distros (at least Fedora since v23, Gentoo/Hardened, soon standard +Gentoo) use `-Wl,-z now` to improve security. By binding upfront, the +loader can mark the GOT as read-only for a security enhancement. See +https://wiki.gentoo.org/wiki/Hardened/Toolchain for more details. + +[Fixed whitespaces (Kamil)] +Bug: https://bugs.gentoo.org/788625 +Bug: https://bugs.gentoo.org/925348 +Reviewed-by: Zbigniew KempczyĆski <zbigniew.kempczynski@intel.com> +Signed-off-by: Matt Turner <mattst88@gmail.com> +--- + lib/igt_halffloat.c | 8 +++ + lib/igt_x86.c | 119 ++------------------------------------------ + lib/igt_x86.h | 118 ++++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 129 insertions(+), 116 deletions(-) + +diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c +index 5dbe08e01..560952d20 100644 +--- a/lib/igt_halffloat.c ++++ b/lib/igt_halffloat.c +@@ -194,6 +194,10 @@ static void half_to_float(const uint16_t *h, float *f, unsigned int num) + f[i] = _half_to_float(h[i]); + } + ++/* The PLT is not initialized when ifunc resolvers run, so all external ++ * functions must be inlined with __attribute__((flatten)). ++ */ ++__attribute__((flatten)) + static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num) + { + if (igt_x86_features() & F16C) +@@ -205,6 +209,10 @@ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned + void igt_float_to_half(const float *f, uint16_t *h, unsigned int num) + __attribute__((ifunc("resolve_float_to_half"))); + ++/* The PLT is not initialized when ifunc resolvers run, so all external ++ * functions must be inlined with __attribute__((flatten)). ++ */ ++__attribute__((flatten)) + static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num) + { + if (igt_x86_features() & F16C) +diff --git a/lib/igt_x86.c b/lib/igt_x86.c +index 8c102fd13..4842164e9 100644 +--- a/lib/igt_x86.c ++++ b/lib/igt_x86.c +@@ -27,14 +27,6 @@ + + #include "config.h" + +-#ifdef HAVE_CPUID_H +-#include <cpuid.h> +-#else +-#define __get_cpuid_max(x, y) 0 +-#define __cpuid(level, a, b, c, d) a = b = c = d = 0 +-#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0 +-#endif +- + #include "igt_x86.h" + #include "igt_aux.h" + +@@ -49,114 +41,7 @@ + * @include: igt_x86.h + */ + +-#define BASIC_CPUID 0x0 +-#define EXTENDED_CPUID 0x80000000 +- +-#ifndef bit_MMX +-#define bit_MMX (1 << 23) +-#endif +- +-#ifndef bit_SSE +-#define bit_SSE (1 << 25) +-#endif +- +-#ifndef bit_SSE2 +-#define bit_SSE2 (1 << 26) +-#endif +- +-#ifndef bit_SSE3 +-#define bit_SSE3 (1 << 0) +-#endif +- +-#ifndef bit_SSSE3 +-#define bit_SSSE3 (1 << 9) +-#endif +- +-#ifndef bit_SSE4_1 +-#define bit_SSE4_1 (1 << 19) +-#endif +- +-#ifndef bit_SSE4_2 +-#define bit_SSE4_2 (1 << 20) +-#endif +- +-#ifndef bit_OSXSAVE +-#define bit_OSXSAVE (1 << 27) +-#endif +- +-#ifndef bit_AVX +-#define bit_AVX (1 << 28) +-#endif +- +-#ifndef bit_F16C +-#define bit_F16C (1 << 29) +-#endif +- +-#ifndef bit_AVX2 +-#define bit_AVX2 (1<<5) +-#endif +- +-#define xgetbv(index,eax,edx) \ +- __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index)) +- +-#define has_YMM 0x1 +- + #if defined(__x86_64__) || defined(__i386__) +-unsigned igt_x86_features(void) +-{ +- unsigned max = __get_cpuid_max(BASIC_CPUID, 0); +- unsigned eax, ebx, ecx, edx; +- unsigned features = 0; +- unsigned extra = 0; +- +- if (max >= 1) { +- __cpuid(1, eax, ebx, ecx, edx); +- +- if (ecx & bit_SSE3) +- features |= SSE3; +- +- if (ecx & bit_SSSE3) +- features |= SSSE3; +- +- if (ecx & bit_SSE4_1) +- features |= SSE4_1; +- +- if (ecx & bit_SSE4_2) +- features |= SSE4_2; +- +- if (ecx & bit_OSXSAVE) { +- unsigned int bv_eax, bv_ecx; +- xgetbv(0, bv_eax, bv_ecx); +- if ((bv_eax & 6) == 6) +- extra |= has_YMM; +- } +- +- if ((extra & has_YMM) && (ecx & bit_AVX)) +- features |= AVX; +- +- if (edx & bit_MMX) +- features |= MMX; +- +- if (edx & bit_SSE) +- features |= SSE; +- +- if (edx & bit_SSE2) +- features |= SSE2; +- +- if (ecx & bit_F16C) +- features |= F16C; +- } +- +- if (max >= 7) { +- __cpuid_count(7, 0, eax, ebx, ecx, edx); +- +- if ((extra & has_YMM) && (ebx & bit_AVX2)) +- features |= AVX2; +- } +- +- return features; +-} +- + char *igt_x86_features_to_string(unsigned features, char *line) + { + char *ret = line; +@@ -284,6 +169,10 @@ static void memcpy_from_wc(void *dst, const void *src, unsigned long len) + memcpy(dst, src, len); + } + ++/* The PLT is not initialized when ifunc resolvers run, so all external ++ * functions must be inlined with __attribute__((flatten)). ++ */ ++__attribute__((flatten)) + static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long) + { + if (igt_x86_features() & SSE4_1) +diff --git a/lib/igt_x86.h b/lib/igt_x86.h +index c7b84dec2..f1df4c1a9 100644 +--- a/lib/igt_x86.h ++++ b/lib/igt_x86.h +@@ -30,6 +30,14 @@ + #ifndef IGT_X86_H + #define IGT_X86_H + ++#ifdef HAVE_CPUID_H ++#include <cpuid.h> ++#else ++#define __get_cpuid_max(x, y) 0 ++#define __cpuid(level, a, b, c, d) a = b = c = d = 0 ++#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0 ++#endif ++ + #define MMX 0x1 + #define SSE 0x2 + #define SSE2 0x4 +@@ -42,7 +50,115 @@ + #define F16C 0x200 + + #if defined(__x86_64__) || defined(__i386__) +-unsigned igt_x86_features(void); ++ ++#define BASIC_CPUID 0x0 ++#define EXTENDED_CPUID 0x80000000 ++ ++#ifndef bit_MMX ++#define bit_MMX (1 << 23) ++#endif ++ ++#ifndef bit_SSE ++#define bit_SSE (1 << 25) ++#endif ++ ++#ifndef bit_SSE2 ++#define bit_SSE2 (1 << 26) ++#endif ++ ++#ifndef bit_SSE3 ++#define bit_SSE3 (1 << 0) ++#endif ++ ++#ifndef bit_SSSE3 ++#define bit_SSSE3 (1 << 9) ++#endif ++ ++#ifndef bit_SSE4_1 ++#define bit_SSE4_1 (1 << 19) ++#endif ++ ++#ifndef bit_SSE4_2 ++#define bit_SSE4_2 (1 << 20) ++#endif ++ ++#ifndef bit_OSXSAVE ++#define bit_OSXSAVE (1 << 27) ++#endif ++ ++#ifndef bit_AVX ++#define bit_AVX (1 << 28) ++#endif ++ ++#ifndef bit_F16C ++#define bit_F16C (1 << 29) ++#endif ++ ++#ifndef bit_AVX2 ++#define bit_AVX2 (1<<5) ++#endif ++ ++#define xgetbv(index, eax, edx) \ ++ __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index)) ++ ++#define has_YMM 0x1 ++ ++static inline unsigned igt_x86_features(void) ++{ ++ unsigned max = __get_cpuid_max(BASIC_CPUID, 0); ++ unsigned eax, ebx, ecx, edx; ++ unsigned features = 0; ++ unsigned extra = 0; ++ ++ if (max >= 1) { ++ __cpuid(1, eax, ebx, ecx, edx); ++ ++ if (ecx & bit_SSE3) ++ features |= SSE3; ++ ++ if (ecx & bit_SSSE3) ++ features |= SSSE3; ++ ++ if (ecx & bit_SSE4_1) ++ features |= SSE4_1; ++ ++ if (ecx & bit_SSE4_2) ++ features |= SSE4_2; ++ ++ if (ecx & bit_OSXSAVE) { ++ unsigned int bv_eax, bv_ecx; ++ ++ xgetbv(0, bv_eax, bv_ecx); ++ if ((bv_eax & 6) == 6) ++ extra |= has_YMM; ++ } ++ ++ if ((extra & has_YMM) && (ecx & bit_AVX)) ++ features |= AVX; ++ ++ if (edx & bit_MMX) ++ features |= MMX; ++ ++ if (edx & bit_SSE) ++ features |= SSE; ++ ++ if (edx & bit_SSE2) ++ features |= SSE2; ++ ++ if (ecx & bit_F16C) ++ features |= F16C; ++ } ++ ++ if (max >= 7) { ++ __cpuid_count(7, 0, eax, ebx, ecx, edx); ++ ++ if ((extra & has_YMM) && (ebx & bit_AVX2)) ++ features |= AVX2; ++ } ++ ++ return features; ++} ++ + char *igt_x86_features_to_string(unsigned features, char *line); + #else + static inline unsigned igt_x86_features(void) +-- +2.43.2 + |