diff options
Diffstat (limited to 'sys-kernel/linux-image-redcore/files/5.5-linux-hardened.patch')
-rw-r--r-- | sys-kernel/linux-image-redcore/files/5.5-linux-hardened.patch | 3884 |
1 files changed, 3884 insertions, 0 deletions
diff --git a/sys-kernel/linux-image-redcore/files/5.5-linux-hardened.patch b/sys-kernel/linux-image-redcore/files/5.5-linux-hardened.patch new file mode 100644 index 00000000..ab28838a --- /dev/null +++ b/sys-kernel/linux-image-redcore/files/5.5-linux-hardened.patch @@ -0,0 +1,3884 @@ +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index ade4e6ec23e0..0d83381ec718 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -503,16 +503,6 @@ + nosocket -- Disable socket memory accounting. + nokmem -- Disable kernel memory accounting. + +- checkreqprot [SELINUX] Set initial checkreqprot flag value. +- Format: { "0" | "1" } +- See security/selinux/Kconfig help text. +- 0 -- check protection applied by kernel (includes +- any implied execute protection). +- 1 -- check protection requested by application. +- Default value is set via a kernel config option. +- Value can be changed at runtime via +- /selinux/checkreqprot. +- + cio_ignore= [S390] + See Documentation/s390/common_io.rst for details. + clk_ignore_unused +@@ -3360,6 +3350,11 @@ + the specified number of seconds. This is to be used if + your oopses keep scrolling off the screen. + ++ extra_latent_entropy ++ Enable a very simple form of latent entropy extraction ++ from the first 4GB of memory as the bootmem allocator ++ passes the memory pages to the buddy allocator. ++ + pcbit= [HW,ISDN] + + pcd. [PARIDE] +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index def074807cee..8770b4bc20f2 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -102,6 +102,7 @@ show up in /proc/sys/kernel: + - sysctl_writes_strict + - tainted ==> Documentation/admin-guide/tainted-kernels.rst + - threads-max ++- tiocsti_restrict + - unknown_nmi_panic + - watchdog + - watchdog_thresh +@@ -1112,6 +1113,25 @@ If a value outside of this range is written to threads-max an error + EINVAL occurs. + + ++tiocsti_restrict: ++================= ++ ++This toggle indicates whether unprivileged users are prevented from using the ++TIOCSTI ioctl to inject commands into other processes which share a tty ++session. ++ ++When tiocsti_restrict is set to (0) there are no restrictions(accept the ++default restriction of only being able to injection commands into one's own ++tty). When tiocsti_restrict is set to (1), users must have CAP_SYS_ADMIN to ++use the TIOCSTI ioctl. ++ ++When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is ++done against the user namespace that originally opened the tty. ++ ++The kernel config option CONFIG_SECURITY_TIOCSTI_RESTRICT sets the default ++value of tiocsti_restrict. ++ ++ + unknown_nmi_panic: + ================== + +diff --git a/arch/Kconfig b/arch/Kconfig +index 5e907a954532..d14dcf0e7821 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -653,7 +653,7 @@ config ARCH_MMAP_RND_BITS + int "Number of bits to use for ASLR of mmap base address" if EXPERT + range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX + default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT +- default ARCH_MMAP_RND_BITS_MIN ++ default ARCH_MMAP_RND_BITS_MAX + depends on HAVE_ARCH_MMAP_RND_BITS + help + This value can be used to select the number of bits to use to +@@ -687,7 +687,7 @@ config ARCH_MMAP_RND_COMPAT_BITS + int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT + range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX + default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT +- default ARCH_MMAP_RND_COMPAT_BITS_MIN ++ default ARCH_MMAP_RND_COMPAT_BITS_MAX + depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS + help + This value can be used to select the number of bits to use to +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index e688dfad0b72..57d51996053f 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1185,6 +1185,7 @@ config RODATA_FULL_DEFAULT_ENABLED + + config ARM64_SW_TTBR0_PAN + bool "Emulate Privileged Access Never using TTBR0_EL1 switching" ++ default y + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved +@@ -1584,6 +1585,7 @@ config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + select ARM64_MODULE_PLTS if MODULES + select RELOCATABLE ++ default y + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts +diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug +index cf09010d825f..dc4083ceff57 100644 +--- a/arch/arm64/Kconfig.debug ++++ b/arch/arm64/Kconfig.debug +@@ -43,6 +43,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET + config DEBUG_WX + bool "Warn on W+X mappings at boot" + select ARM64_PTDUMP_CORE ++ default y + ---help--- + Generate a warning if any W+X mappings are found at boot. + +diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig +index 6a83ba2aea3e..5aac62b75843 100644 +--- a/arch/arm64/configs/defconfig ++++ b/arch/arm64/configs/defconfig +@@ -1,4 +1,3 @@ +-CONFIG_SYSVIPC=y + CONFIG_POSIX_MQUEUE=y + CONFIG_AUDIT=y + CONFIG_NO_HZ_IDLE=y +diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h +index b618017205a3..0a228dbcad65 100644 +--- a/arch/arm64/include/asm/elf.h ++++ b/arch/arm64/include/asm/elf.h +@@ -103,14 +103,10 @@ + + /* + * This is the base location for PIE (ET_DYN with INTERP) loads. On +- * 64-bit, this is above 4GB to leave the entire 32-bit address ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +-#ifdef CONFIG_ARM64_FORCE_52BIT +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +-#else +-#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) +-#endif /* CONFIG_ARM64_FORCE_52BIT */ ++#define ELF_ET_DYN_BASE 0x100000000UL + + #ifndef __ASSEMBLY__ + +@@ -164,10 +160,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, + /* 1GB of VA */ + #ifdef CONFIG_COMPAT + #define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ +- 0x7ff >> (PAGE_SHIFT - 12) : \ +- 0x3ffff >> (PAGE_SHIFT - 12)) ++ ((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \ ++ ((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) + #else +-#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) ++#define STACK_RND_MASK (((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) + #endif + + #ifdef __AARCH64EB__ +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 5e8949953660..90fa4d372263 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1189,8 +1189,7 @@ config VM86 + default X86_LEGACY_VM86 + + config X86_16BIT +- bool "Enable support for 16-bit segments" if EXPERT +- default y ++ bool "Enable support for 16-bit segments" + depends on MODIFY_LDT_SYSCALL + ---help--- + This option is required by programs like Wine to run 16-bit +@@ -2354,7 +2353,7 @@ config COMPAT_VDSO + choice + prompt "vsyscall table for legacy applications" + depends on X86_64 +- default LEGACY_VSYSCALL_XONLY ++ default LEGACY_VSYSCALL_NONE + help + Legacy user code that does not know how to find the vDSO expects + to be able to issue three syscalls by calling fixed addresses in +@@ -2450,8 +2449,7 @@ config CMDLINE_OVERRIDE + be set to 'N' under normal conditions. + + config MODIFY_LDT_SYSCALL +- bool "Enable the LDT (local descriptor table)" if EXPERT +- default y ++ bool "Enable the LDT (local descriptor table)" + ---help--- + Linux can allow user programs to install a per-process x86 + Local Descriptor Table (LDT) using the modify_ldt(2) system +diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug +index c4eab8ed33a3..4883c77fdc55 100644 +--- a/arch/x86/Kconfig.debug ++++ b/arch/x86/Kconfig.debug +@@ -91,6 +91,7 @@ config EFI_PGT_DUMP + config DEBUG_WX + bool "Warn on W+X mappings at boot" + select X86_PTDUMP_CORE ++ default y + ---help--- + Generate a warning if any W+X mappings are found at boot. + +diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig +index 0b9654c7a05c..4fdb04daf3dc 100644 +--- a/arch/x86/configs/x86_64_defconfig ++++ b/arch/x86/configs/x86_64_defconfig +@@ -1,5 +1,4 @@ + # CONFIG_LOCALVERSION_AUTO is not set +-CONFIG_SYSVIPC=y + CONFIG_POSIX_MQUEUE=y + CONFIG_BSD_PROCESS_ACCT=y + CONFIG_TASKSTATS=y +diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c +index f5937742b290..6655ce228e25 100644 +--- a/arch/x86/entry/vdso/vma.c ++++ b/arch/x86/entry/vdso/vma.c +@@ -198,55 +198,9 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) + } + + #ifdef CONFIG_X86_64 +-/* +- * Put the vdso above the (randomized) stack with another randomized +- * offset. This way there is no hole in the middle of address space. +- * To save memory make sure it is still in the same PTE as the stack +- * top. This doesn't give that many random bits. +- * +- * Note that this algorithm is imperfect: the distribution of the vdso +- * start address within a PMD is biased toward the end. +- * +- * Only used for the 64-bit and x32 vdsos. +- */ +-static unsigned long vdso_addr(unsigned long start, unsigned len) +-{ +- unsigned long addr, end; +- unsigned offset; +- +- /* +- * Round up the start address. It can start out unaligned as a result +- * of stack start randomization. +- */ +- start = PAGE_ALIGN(start); +- +- /* Round the lowest possible end address up to a PMD boundary. */ +- end = (start + len + PMD_SIZE - 1) & PMD_MASK; +- if (end >= TASK_SIZE_MAX) +- end = TASK_SIZE_MAX; +- end -= len; +- +- if (end > start) { +- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); +- addr = start + (offset << PAGE_SHIFT); +- } else { +- addr = start; +- } +- +- /* +- * Forcibly align the final address in case we have a hardware +- * issue that requires alignment for performance reasons. +- */ +- addr = align_vdso_addr(addr); +- +- return addr; +-} +- + static int map_vdso_randomized(const struct vdso_image *image) + { +- unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); +- +- return map_vdso(image, addr); ++ return map_vdso(image, 0); + } + #endif + +diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h +index 69c0f892e310..f9f7a85bb71e 100644 +--- a/arch/x86/include/asm/elf.h ++++ b/arch/x86/include/asm/elf.h +@@ -248,11 +248,11 @@ extern int force_personality32; + + /* + * This is the base location for PIE (ET_DYN with INTERP) loads. On +- * 64-bit, this is above 4GB to leave the entire 32-bit address ++ * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ + #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ +- (DEFAULT_MAP_WINDOW / 3 * 2)) ++ 0x100000000UL) + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +@@ -312,8 +312,8 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); + + #ifdef CONFIG_X86_32 + +-#define __STACK_RND_MASK(is32bit) (0x7ff) +-#define STACK_RND_MASK (0x7ff) ++#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) ++#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1) + + #define ARCH_DLINFO ARCH_DLINFO_IA32 + +@@ -322,7 +322,11 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); + #else /* CONFIG_X86_32 */ + + /* 1GB for 64bit, 8MB for 32bit */ +-#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff) ++#ifdef CONFIG_COMPAT ++#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1) ++#else ++#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) ++#endif + #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32()) + + #define ARCH_DLINFO \ +@@ -380,5 +384,4 @@ struct va_alignment { + } ____cacheline_aligned; + + extern struct va_alignment va_align; +-extern unsigned long align_vdso_addr(unsigned long); + #endif /* _ASM_X86_ELF_H */ +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 6f66d841262d..b786e7cb395d 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -295,6 +295,7 @@ static inline void cr4_set_bits_irqsoff(unsigned long mask) + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); ++ BUG_ON(cr4 != __read_cr4()); + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); + } +@@ -305,6 +306,7 @@ static inline void cr4_clear_bits_irqsoff(unsigned long mask) + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); ++ BUG_ON(cr4 != __read_cr4()); + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); + } +@@ -334,6 +336,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask) + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); ++ BUG_ON(cr4 != __read_cr4()); + __cr4_set(cr4 ^ mask); + } + +@@ -440,6 +443,7 @@ static inline void __native_flush_tlb_global(void) + raw_local_irq_save(flags); + + cr4 = this_cpu_read(cpu_tlbstate.cr4); ++ BUG_ON(cr4 != __read_cr4()); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 61e93a318983..db75c5ddbfc4 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -43,6 +43,8 @@ + #include <asm/spec-ctrl.h> + #include <asm/io_bitmap.h> + #include <asm/proto.h> ++#include <asm/elf.h> ++#include <linux/sizes.h> + + #include "process.h" + +@@ -917,7 +919,10 @@ unsigned long arch_align_stack(unsigned long sp) + + unsigned long arch_randomize_brk(struct mm_struct *mm) + { +- return randomize_page(mm->brk, 0x02000000); ++ if (mmap_is_ia32()) ++ return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; ++ else ++ return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; + } + + /* +diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c +index f7476ce23b6e..652169a2b23a 100644 +--- a/arch/x86/kernel/sys_x86_64.c ++++ b/arch/x86/kernel/sys_x86_64.c +@@ -54,13 +54,6 @@ static unsigned long get_align_bits(void) + return va_align.bits & get_align_mask(); + } + +-unsigned long align_vdso_addr(unsigned long addr) +-{ +- unsigned long align_mask = get_align_mask(); +- addr = (addr + align_mask) & ~align_mask; +- return addr | get_align_bits(); +-} +- + static int __init control_va_addr_alignment(char *str) + { + /* guard against enabling this on other CPU families */ +@@ -122,10 +115,7 @@ static void find_start_end(unsigned long addr, unsigned long flags, + } + + *begin = get_mmap_base(1); +- if (in_32bit_syscall()) +- *end = task_size_32bit(); +- else +- *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); ++ *end = get_mmap_base(0); + } + + unsigned long +@@ -210,7 +200,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; +- info.low_limit = PAGE_SIZE; ++ info.low_limit = get_mmap_base(1); + info.high_limit = get_mmap_base(0); + + /* +diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c +index 0a74407ef92e..5ceff405c81c 100644 +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -560,9 +560,9 @@ static void __init pagetable_init(void) + + #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) + /* Bits supported by the hardware: */ +-pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; ++pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK; + /* Bits allowed in normal kernel mappings: */ +-pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; ++pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK; + EXPORT_SYMBOL_GPL(__supported_pte_mask); + /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ + EXPORT_SYMBOL(__default_kernel_pte_mask); +diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c +index bcfede46fe02..e6d649daa4e6 100644 +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -97,9 +97,9 @@ DEFINE_ENTRY(pte, pte, init) + */ + + /* Bits supported by the hardware: */ +-pteval_t __supported_pte_mask __read_mostly = ~0; ++pteval_t __supported_pte_mask __ro_after_init = ~0; + /* Bits allowed in normal kernel mappings: */ +-pteval_t __default_kernel_pte_mask __read_mostly = ~0; ++pteval_t __default_kernel_pte_mask __ro_after_init = ~0; + EXPORT_SYMBOL_GPL(__supported_pte_mask); + /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ + EXPORT_SYMBOL(__default_kernel_pte_mask); +diff --git a/block/blk-softirq.c b/block/blk-softirq.c +index 6e7ec87d49fa..d6ee3f8b3e74 100644 +--- a/block/blk-softirq.c ++++ b/block/blk-softirq.c +@@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); + * Softirq action handler - move entries to local list and loop over them + * while passing them to the queue registered handler. + */ +-static __latent_entropy void blk_done_softirq(struct softirq_action *h) ++static __latent_entropy void blk_done_softirq(void) + { + struct list_head *cpu_list, local_list; + +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index 42c8728f6117..69c425cf0101 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -5146,7 +5146,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) + struct ata_port *ap; + unsigned int tag; + +- WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ++ BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + ap = qc->ap; + + qc->flags = 0; +@@ -5163,7 +5163,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) + struct ata_port *ap; + struct ata_link *link; + +- WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ++ BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)); + ap = qc->ap; + link = qc->dev->link; +diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig +index 26956c006987..206edc0b60a9 100644 +--- a/drivers/char/Kconfig ++++ b/drivers/char/Kconfig +@@ -9,7 +9,6 @@ source "drivers/tty/Kconfig" + + config DEVMEM + bool "/dev/mem virtual device support" +- default y + help + Say Y here if you want to support the /dev/mem device. + The /dev/mem device is used to access areas of physical +@@ -514,7 +513,6 @@ config TELCLOCK + config DEVPORT + bool "/dev/port character device" + depends on ISA || PCI +- default y + help + Say Y here if you want to support the /dev/port device. The /dev/port + device is similar to /dev/mem, but for I/O ports. +diff --git a/drivers/char/random.c b/drivers/char/random.c +index ea1973d35843..0680d7cda180 100644 +--- a/drivers/char/random.c ++++ b/drivers/char/random.c +@@ -350,11 +350,20 @@ + /* + * Configuration information + */ ++#ifdef CONFIG_HARDENED_RANDOM ++#define INPUT_POOL_SHIFT 18 ++#define OUTPUT_POOL_SHIFT 16 ++#else + #define INPUT_POOL_SHIFT 12 +-#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) + #define OUTPUT_POOL_SHIFT 10 ++#endif ++#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) + #define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) ++#ifdef CONFIG_HARDENED_RANDOM ++#define SEC_XFER_SIZE 32768 ++#else + #define SEC_XFER_SIZE 512 ++#endif + #define EXTRACT_SIZE 10 + + +@@ -363,9 +372,6 @@ + /* + * To allow fractional bits to be tracked, the entropy_count field is + * denominated in units of 1/8th bits. +- * +- * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in +- * credit_entropy_bits() needs to be 64 bits wide. + */ + #define ENTROPY_SHIFT 3 + #define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) +@@ -428,17 +434,28 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; + * polynomial which improves the resulting TGFSR polynomial to be + * irreducible, which we have made here. + */ +-static const struct poolinfo { ++static struct poolinfo { + int poolbitshift, poolwords, poolbytes, poolfracbits; +-#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) +- int tap1, tap2, tap3, tap4, tap5; +-} poolinfo_table[] = { ++#define S(x) \ ++ .poolbitshift = ilog2(x)+5, \ ++ .poolwords = (x), \ ++ .poolbytes = (x)*4, \ ++ .poolfracbits = (x) << (ENTROPY_SHIFT+5) ++ int tap[5]; ++} __randomize_layout poolinfo_table[] = { ++#ifdef CONFIG_HARDENED_RANDOM ++ /* x^8192 + x^104 + x^76 + x^51 +x^25 + x + 1 */ ++ { S(8192), .tap = { 104, 76, 51, 25, 1 } }, ++ /* x^2048 + x^26 + x^19 + x^14 + x^7 + x + 1 */ ++ { S(2048), .tap = { 26, 19, 14, 7, 1 } } ++#else + /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ + /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ +- { S(128), 104, 76, 51, 25, 1 }, ++ { S(128), .tap = { 104, 76, 51, 25, 1 } }, + /* was: x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 */ + /* x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */ +- { S(32), 26, 19, 14, 7, 1 }, ++ { S(32), .tap = { 26, 19, 14, 7, 1 } }, ++#endif + #if 0 + /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ + { S(2048), 1638, 1231, 819, 411, 1 }, +@@ -482,7 +499,7 @@ struct crng_state { + __u32 state[16]; + unsigned long init_time; + spinlock_t lock; +-}; ++} __randomize_layout; + + static struct crng_state primary_crng = { + .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), +@@ -542,7 +559,7 @@ struct entropy_store { + unsigned int initialized:1; + unsigned int last_data_init:1; + __u8 last_data[EXTRACT_SIZE]; +-}; ++} __randomize_layout; + + static ssize_t extract_entropy(struct entropy_store *r, void *buf, + size_t nbytes, int min, int rsvd); +@@ -553,6 +570,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r); + static void push_to_pool(struct work_struct *work); + static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; + static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy; ++/* this actually doesn't need latent entropy */ ++static __u32 secondary_xfer_buffer[OUTPUT_POOL_WORDS]; + + static struct entropy_store input_pool = { + .poolinfo = &poolinfo_table[0], +@@ -571,9 +590,78 @@ static struct entropy_store blocking_pool = { + push_to_pool), + }; + ++#ifdef CONFIG_HARDENED_RANDOM ++static __u32 const twist_table[64][4] = { ++ { 0x6a09e668, 0xbb67ae86, 0x3c6ef373, 0xa54ff53a }, ++ { 0x510e5280, 0x9b05688c, 0x1f83d9ac, 0x5be0cd19 }, ++ { 0xcbbb9d5e, 0x629a292a, 0x9159015a, 0x152fecd9 }, ++ { 0x67332668, 0x8eb44a87, 0xdb0c2e0d, 0x47b5481e }, ++ { 0xae5f9157, 0xcf6c85d4, 0x2f73477d, 0x6d1826cb }, ++ { 0x8b43d457, 0xe360b597, 0x1c456003, 0x6f196331 }, ++ { 0xd94ebeb2, 0x0cc4a612, 0x261dc1f3, 0x5815a7be }, ++ { 0x70b7ed68, 0xa1513c69, 0x44f93636, 0x720dcdfe }, ++ { 0xb467369e, 0xca320b76, 0x34e0d42e, 0x49c7d9be }, ++ { 0x87abb9f2, 0xc463a2fc, 0xec3fc3f4, 0x27277f6d }, ++ { 0x610bebf3, 0x7420b49f, 0xd1fd8a34, 0xe4773594 }, ++ { 0x092197f6, 0x1b530c96, 0x869d6343, 0xeee52e50 }, ++ { 0x1107668a, 0x21fba37c, 0x43ab9fb6, 0x75a9f91d }, ++ { 0x8630501a, 0xd7cd8174, 0x007fe010, 0x0379f514 }, ++ { 0x066b651b, 0x0764ab84, 0x0a4b06be, 0x0c3578c1 }, ++ { 0x0d2962a5, 0x11e039f4, 0x1857b7bf, 0x1a29bf2e }, ++ { 0x1b11a32f, 0x1cdf34e8, 0x23183042, 0x25b89093 }, ++ { 0x2a0c06a1, 0x2ae79843, 0x2c9cda69, 0x2f281f24 }, ++ { 0x32841259, 0x3502e64e, 0x377c9c21, 0x39204cda }, ++ { 0x3b91bf66, 0x3ecc38ca, 0x40665609, 0x43947938 }, ++ { 0x47830769, 0x484ae4b8, 0x4c2b2b75, 0x4cf03d21 }, ++ { 0x4f3cbb11, 0x50c2d3b5, 0x5308af16, 0x560a7a9a }, ++ { 0x5788d981, 0x584769b4, 0x59c34f06, 0x5e2d564c }, ++ { 0x6116d760, 0x62894c10, 0x6569b58c, 0x66d7b394 }, ++ { 0x68f9f8dc, 0x6d34f03d, 0x6de8372f, 0x742687a4 }, ++ { 0x76356021, 0x799d1235, 0x7ba455f4, 0x7da8d73b }, ++ { 0x7e546743, 0x80554bdc, 0x83a63a3c, 0x85a01e39 }, ++ { 0x879774ac, 0x883eac9f, 0x8a32aae0, 0x8c243210 }, ++ { 0x8d6e8781, 0x8e134b6f, 0x91ea5892, 0x95166fe4 }, ++ { 0x95b817e6, 0x96faa747, 0x98dca135, 0x9abc6593 }, ++ { 0x9b5bd55a, 0x9f136df7, 0xa04ebd79, 0xa225f6ed }, ++ { 0xa4970e49, 0xa79f5a6b, 0xaa0869af, 0xad06dcbd }, ++ { 0xaf68312e, 0xb12efe0b, 0xb2f3ef5b, 0xb420e03a }, ++ { 0xb6785656, 0xb837d738, 0xb9613115, 0xbbb18efb }, ++ { 0xbcd89621, 0xc0db3814, 0xc3b2f2a3, 0xc71638d9 }, ++ { 0xc7a6240f, 0xca73166e, 0xcb01f3ba, 0xcc1f293d }, ++ { 0xccad81c8, 0xcf72acaf, 0xd34c7258, 0xd4649b7a }, ++ { 0xd4f07147, 0xd607a013, 0xd9d3b47b, 0xdae803b5 }, ++ { 0xdb71ef1a, 0xdc854e24, 0xe1dcf0ea, 0xe2eca719 }, ++ { 0xe50a4ad8, 0xe7ac0990, 0xe9c46d3a, 0xeacfc33c }, ++ { 0xec5fb417, 0xedee611c, 0xf18bc533, 0xf292ef77 }, ++ { 0xf41cab36, 0xf5a531ec, 0xf7aeb45d, 0xf93474e9 }, ++ { 0xfc3c7559, 0xfd3e1962, 0xfebf9bc1, 0xff3fdbf2 }, ++ { 0x01bf3cab, 0x023ebd6b, 0x03bc8288, 0x06365a0f }, ++ { 0x06b4c1d2, 0x092afcc1, 0x09a8ad2c, 0x0b21093c }, ++ { 0x0f83d25e, 0x107c1074, 0x10f803d0, 0x11ef938d }, ++ { 0x136212e8, 0x14d390a4, 0x16beab25, 0x182dd7d5 }, ++ { 0x199c09bf, 0x1ed27f46, 0x1f4b2d3e, 0x21a502bc }, ++ { 0x23849e06, 0x25d9d3da, 0x273ef0ca, 0x28a326f6 }, ++ { 0x2a7cb5e4, 0x2d4019ba, 0x2e2b1e73, 0x2f8aec73 }, ++ { 0x30e9ddcc, 0x315ea828, 0x32bc75cf, 0x357587f0 }, ++ { 0x37b7de93, 0x3bc31ec6, 0x3c35b24a, 0x3d1a949b }, ++ { 0x3e713d15, 0x3ee347da, 0x4038e0bf, 0x411c2bae }, ++ { 0x418daf9a, 0x4270749e, 0x4516b0b0, 0x45876dcb }, ++ { 0x46d92246, 0x4e448a56, 0x4f9141c0, 0x50dd3e71 }, ++ { 0x5296c45b, 0x56738aac, 0x58961d02, 0x5b9010c1 }, ++ { 0x5c6913ae, 0x5cd577f2, 0x5dae0649, 0x5ef24aeb }, ++ { 0x60a199af, 0x6178ce9b, 0x61e44c97, 0x6326551c }, ++ { 0x65a86b29, 0x67bd7e12, 0x6827e41c, 0x68fc7925 }, ++ { 0x6966a836, 0x6a3acfa3, 0x6b78828a, 0x6df2017d }, ++ { 0x7068fdbb, 0x720c4495, 0x747f226b, 0x75b7a753 }, ++ { 0x7687a9e0, 0x77bf2d48, 0x795d98d4, 0x7a2c690b }, ++ { 0x7bc93fa8, 0x7c974690, 0x7f6653f3, 0x80333127 }, ++ { 0x81660244, 0x81cc2760, 0x829840e3, 0x83c9edd4 } ++}; ++#else + static __u32 const twist_table[8] = { + 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, + 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; ++#endif + + /* + * This function adds bytes into the entropy "pool". It does not +@@ -588,17 +676,14 @@ static __u32 const twist_table[8] = { + static void _mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes) + { +- unsigned long i, tap1, tap2, tap3, tap4, tap5; ++ unsigned long i, n, t1, t2, tap[5]; + int input_rotate; + int wordmask = r->poolinfo->poolwords - 1; + const char *bytes = in; + __u32 w; + +- tap1 = r->poolinfo->tap1; +- tap2 = r->poolinfo->tap2; +- tap3 = r->poolinfo->tap3; +- tap4 = r->poolinfo->tap4; +- tap5 = r->poolinfo->tap5; ++ for (n = 0; n < 5; n++) ++ tap[n] = r->poolinfo->tap[n]; + + input_rotate = r->input_rotate; + i = r->add_ptr; +@@ -610,14 +695,17 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, + + /* XOR in the various taps */ + w ^= r->pool[i]; +- w ^= r->pool[(i + tap1) & wordmask]; +- w ^= r->pool[(i + tap2) & wordmask]; +- w ^= r->pool[(i + tap3) & wordmask]; +- w ^= r->pool[(i + tap4) & wordmask]; +- w ^= r->pool[(i + tap5) & wordmask]; ++ for (n = 0; n < 5; n++) ++ w ^= r->pool[(i + tap[n]) & wordmask]; + + /* Mix the result back in with a twist */ ++#ifdef CONFIG_HARDENED_RANDOM ++ t1 = rol32(w, 14) & 0x1FFF; // 0-63, 1111111111111 ++ t2 = rol32(w, t1) & 0x3; // 0-3, 11 ++ r->pool[i] = (w >> 3) ^ twist_table[t1][t2]; ++#else + r->pool[i] = (w >> 3) ^ twist_table[w & 7]; ++#endif + + /* + * Normally, we add 7 bits of rotation to the pool. +@@ -655,7 +743,7 @@ struct fast_pool { + unsigned long last; + unsigned short reg_idx; + unsigned char count; +-}; ++} __randomize_layout; + + /* + * This is a fast mixing routine used by the interrupt randomness +@@ -750,7 +838,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) + /* The +2 corresponds to the /4 in the denominator */ + + do { +- unsigned int anfrac = min(pnfrac, pool_size/2); ++ __u64 anfrac = min(pnfrac, pool_size/2); + unsigned int add = + ((pool_size - entropy_count)*anfrac*3) >> s; + +@@ -1134,7 +1222,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) + + extract_crng(tmp); + i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); +- if (copy_to_user(buf, tmp, i)) { ++ if (i > sizeof(tmp) || copy_to_user(buf, tmp, i)) { + ret = -EFAULT; + break; + } +@@ -1162,9 +1250,9 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) + struct timer_rand_state { + cycles_t last_time; + long last_delta, last_delta2; +-}; ++} __randomize_layout; + +-#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; ++#define INIT_TIMER_RAND_STATE { .last_time = INITIAL_JIFFIES }; + + /* + * Add device- or boot-specific data to the input pool to help +@@ -1407,20 +1495,18 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) + + static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) + { +- __u32 tmp[OUTPUT_POOL_WORDS]; +- + int bytes = nbytes; + + /* pull at least as much as a wakeup */ + bytes = max_t(int, bytes, random_read_wakeup_bits / 8); + /* but never more than the buffer size */ +- bytes = min_t(int, bytes, sizeof(tmp)); ++ bytes = min_t(int, bytes, sizeof(secondary_xfer_buffer)); + + trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8, + ENTROPY_BITS(r), ENTROPY_BITS(r->pull)); +- bytes = extract_entropy(r->pull, tmp, bytes, ++ bytes = extract_entropy(r->pull, secondary_xfer_buffer, bytes, + random_read_wakeup_bits / 8, 0); +- mix_pool_bytes(r, tmp, bytes); ++ mix_pool_bytes(r, secondary_xfer_buffer, bytes); + credit_entropy_bits(r, bytes*8); + } + +@@ -1650,7 +1736,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, + + extract_buf(r, tmp); + i = min_t(int, nbytes, EXTRACT_SIZE); +- if (copy_to_user(buf, tmp, i)) { ++ if (i > sizeof(tmp) || copy_to_user(buf, tmp, i)) { + ret = -EFAULT; + break; + } +@@ -2356,7 +2442,7 @@ struct batched_entropy { + }; + unsigned int position; + spinlock_t batch_lock; +-}; ++} __randomize_layout; + + /* + * Get a random word for internal kernel use only. The quality of the random +diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig +index a312cb33a99b..b141c96e955d 100644 +--- a/drivers/tty/Kconfig ++++ b/drivers/tty/Kconfig +@@ -122,7 +122,6 @@ config UNIX98_PTYS + + config LEGACY_PTYS + bool "Legacy (BSD) PTY support" +- default y + ---help--- + A pseudo terminal (PTY) is a software device consisting of two + halves: a master and a slave. The slave device behaves identical to +diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c +index d9f54c7d94f2..f17f280faa05 100644 +--- a/drivers/tty/tty_io.c ++++ b/drivers/tty/tty_io.c +@@ -174,6 +174,7 @@ static void free_tty_struct(struct tty_struct *tty) + put_device(tty->dev); + kfree(tty->write_buf); + tty->magic = 0xDEADDEAD; ++ put_user_ns(tty->owner_user_ns); + kfree(tty); + } + +@@ -2183,11 +2184,19 @@ static int tty_fasync(int fd, struct file *filp, int on) + * FIXME: may race normal receive processing + */ + ++int tiocsti_restrict = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT); ++ + static int tiocsti(struct tty_struct *tty, char __user *p) + { + char ch, mbz = 0; + struct tty_ldisc *ld; + ++ if (tiocsti_restrict && ++ !ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) { ++ dev_warn_ratelimited(tty->dev, ++ "Denied TIOCSTI ioctl for non-privileged process\n"); ++ return -EPERM; ++ } + if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ch, p)) +@@ -3011,6 +3020,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) + tty->index = idx; + tty_line_name(driver, idx, tty->name); + tty->dev = tty_get_device(tty); ++ tty->owner_user_ns = get_user_ns(current_user_ns()); + + return tty; + } +diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c +index 1d212f82c69b..bcaabb9e8ece 100644 +--- a/drivers/usb/core/hub.c ++++ b/drivers/usb/core/hub.c +@@ -45,6 +45,8 @@ + #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ + #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ + ++extern int deny_new_usb; ++ + /* Protect struct usb_device->state and ->children members + * Note: Both are also protected by ->dev.sem, except that ->state can + * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ +@@ -5083,6 +5085,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, + goto done; + return; + } ++ ++ if (deny_new_usb) { ++ dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1); ++ goto done; ++ } ++ + if (hub_is_superspeed(hub->hdev)) + unit_load = 150; + else +diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c +index f4d8df5e4714..25a388be019c 100644 +--- a/fs/debugfs/inode.c ++++ b/fs/debugfs/inode.c +@@ -36,6 +36,10 @@ static struct vfsmount *debugfs_mount; + static int debugfs_mount_count; + static bool debugfs_registered; + ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++extern int sysfs_restricted; ++#endif ++ + /* + * Don't allow access attributes to be changed whilst the kernel is locked down + * so that we can use the file mode as part of a heuristic to determine whether +@@ -555,6 +559,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) + return failed_creating(dentry); + } + ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++ if (sysfs_restricted) ++ inode->i_mode = S_IFDIR | S_IRWXU; ++ else ++#endif + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_op = &debugfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; +diff --git a/fs/exec.c b/fs/exec.c +index 74d88dab98dd..26985e4cf9fe 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -62,6 +62,7 @@ + #include <linux/oom.h> + #include <linux/compat.h> + #include <linux/vmalloc.h> ++#include <linux/random.h> + + #include <linux/uaccess.h> + #include <asm/mmu_context.h> +@@ -275,6 +276,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm) + arch_bprm_mm_init(mm, vma); + up_write(&mm->mmap_sem); + bprm->p = vma->vm_end - sizeof(void *); ++ if (randomize_va_space) ++ bprm->p ^= get_random_int() & ~PAGE_MASK; + return 0; + err: + up_write(&mm->mmap_sem); +diff --git a/fs/namei.c b/fs/namei.c +index 70eb4bfeaebc..25d7adbd73ce 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -124,6 +124,10 @@ + + #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) + ++#ifdef CONFIG_HARDENED_FIFO ++extern int fifo_restrictions; ++#endif ++ + struct filename * + getname_flags(const char __user *filename, int flags, int *empty) + { +@@ -877,10 +881,10 @@ static inline void put_link(struct nameidata *nd) + path_put(&last->link); + } + +-int sysctl_protected_symlinks __read_mostly = 0; +-int sysctl_protected_hardlinks __read_mostly = 0; +-int sysctl_protected_fifos __read_mostly; +-int sysctl_protected_regular __read_mostly; ++int sysctl_protected_symlinks __read_mostly = 1; ++int sysctl_protected_hardlinks __read_mostly = 1; ++int sysctl_protected_fifos __read_mostly = 2; ++int sysctl_protected_regular __read_mostly = 2; + + /** + * may_follow_link - Check symlink following for unsafe situations +@@ -3195,6 +3199,32 @@ static int lookup_open(struct nameidata *nd, struct path *path, + return error; + } + ++/* ++ * Handles possibly restricted FIFO operations ++ * if the user doesn't own this directory. ++ */ ++static int fifo_restricted(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ const struct dentry *dir, ++ const int flag, ++ const int acc_mode) { ++#ifdef CONFIG_HARDENED_FIFO ++ const struct cred *cred; ++ struct inode *inode, *dir_inode; ++ ++ cred = current_cred(); ++ inode = d_backing_inode(dentry); ++ dir_inode = d_backing_inode(dir); ++ ++ if (fifo_restrictions && S_ISFIFO(inode->i_mode) && ++ !(flag & O_EXCL) && (dir_inode->i_mode & S_ISVTX) && ++ !uid_eq(inode->i_uid, dir_inode->i_uid) && ++ !uid_eq(cred->fsuid, inode->i_uid)) ++ return -EACCES; ++#endif ++ return 0; ++} ++ + /* + * Handle the last step of open() + */ +@@ -3308,6 +3338,15 @@ static int do_last(struct nameidata *nd, + if (unlikely(error < 0)) + return error; + ++ /* ++ * Only check if O_CREAT is specified, all other checks need to go ++ * into may_open(). ++ */ ++ if (fifo_restricted(path.dentry, path.mnt, dir, open_flag, acc_mode)) { ++ path_to_nameidata(&path, nd); ++ return -EACCES; ++ } ++ + /* + * create/update audit record if it already exists. + */ +diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig +index e7dd07f47825..2b357b4355fd 100644 +--- a/fs/nfs/Kconfig ++++ b/fs/nfs/Kconfig +@@ -195,4 +195,3 @@ config NFS_DEBUG + bool + depends on NFS_FS && SUNRPC_DEBUG + select CRC32 +- default y +diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig +index 733881a6387b..c9fe82894423 100644 +--- a/fs/proc/Kconfig ++++ b/fs/proc/Kconfig +@@ -41,7 +41,6 @@ config PROC_KCORE + config PROC_VMCORE + bool "/proc/vmcore support" + depends on PROC_FS && CRASH_DUMP +- default y + help + Exports the dump image of crashed kernel in ELF format. + +diff --git a/fs/stat.c b/fs/stat.c +index c38e4c2e1221..6135fbaf7298 100644 +--- a/fs/stat.c ++++ b/fs/stat.c +@@ -40,8 +40,13 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) + stat->gid = inode->i_gid; + stat->rdev = inode->i_rdev; + stat->size = i_size_read(inode); +- stat->atime = inode->i_atime; +- stat->mtime = inode->i_mtime; ++ if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { ++ stat->atime = inode->i_ctime; ++ stat->mtime = inode->i_ctime; ++ } else { ++ stat->atime = inode->i_atime; ++ stat->mtime = inode->i_mtime; ++ } + stat->ctime = inode->i_ctime; + stat->blksize = i_blocksize(inode); + stat->blocks = inode->i_blocks; +@@ -77,9 +82,14 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, + if (IS_AUTOMOUNT(inode)) + stat->attributes |= STATX_ATTR_AUTOMOUNT; + +- if (inode->i_op->getattr) +- return inode->i_op->getattr(path, stat, request_mask, +- query_flags); ++ if (inode->i_op->getattr) { ++ int retval = inode->i_op->getattr(path, stat, request_mask, query_flags); ++ if (!retval && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { ++ stat->atime = stat->ctime; ++ stat->mtime = stat->ctime; ++ } ++ return retval; ++ } + + generic_fillattr(inode, stat); + return 0; +diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c +index aa85f2874a9f..9b85cc73f70f 100644 +--- a/fs/sysfs/dir.c ++++ b/fs/sysfs/dir.c +@@ -18,6 +18,10 @@ + + DEFINE_SPINLOCK(sysfs_symlink_target_lock); + ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++extern int sysfs_restricted; ++#endif ++ + void sysfs_warn_dup(struct kernfs_node *parent, const char *name) + { + char *buf; +@@ -40,12 +44,20 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) + int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) + { + struct kernfs_node *parent, *kn; ++ const char* name; ++ umode_t mode; + kuid_t uid; + kgid_t gid; + ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++ const char *parent_name; ++#endif ++ + if (WARN_ON(!kobj)) + return -EINVAL; + ++ name = kobject_name(kobj); ++ + if (kobj->parent) + parent = kobj->parent->sd; + else +@@ -56,12 +68,30 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) + + kobject_get_ownership(kobj, &uid, &gid); + +- kn = kernfs_create_dir_ns(parent, kobject_name(kobj), +- S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, +- kobj, ns); ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++ if (sysfs_restricted) { ++ parent_name = parent->name; ++ mode = S_IRWXU; ++ ++ if ((!strcmp(parent_name, "") && (!strcmp(name, "devices") || ++ !strcmp(name, "fs"))) || ++ (!strcmp(parent_name, "devices") && !strcmp(name, "system")) || ++ (!strcmp(parent_name, "fs") && (!strcmp(name, "selinux") || ++ !strcmp(name, "fuse") || !strcmp(name, "ecryptfs"))) || ++ (!strcmp(parent_name, "system") && !strcmp(name, "cpu"))) ++ mode |= S_IRUGO | S_IXUGO; ++ } ++ else ++ mode = S_IRWXU | S_IRUGO | S_IXUGO; ++#else ++ mode = S_IRWXU | S_IRUGO | S_IXUGO; ++#endif ++ ++ kn = kernfs_create_dir_ns(parent, name, mode, uid, gid, kobj, ns); ++ + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) +- sysfs_warn_dup(parent, kobject_name(kobj)); ++ sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index 37df7c9eedb1..97e21b2c2670 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -28,7 +28,11 @@ + #include <linux/security.h> + #include <linux/hugetlb.h> + ++#ifdef CONFIG_USERFAULTFD_UNPRIVILEGED + int sysctl_unprivileged_userfaultfd __read_mostly = 1; ++#else ++int sysctl_unprivileged_userfaultfd __read_mostly; ++#endif + + static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; + +diff --git a/include/linux/cache.h b/include/linux/cache.h +index 750621e41d1c..e7157c18c62c 100644 +--- a/include/linux/cache.h ++++ b/include/linux/cache.h +@@ -31,6 +31,8 @@ + #define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) + #endif + ++#define __read_only __ro_after_init ++ + #ifndef ____cacheline_aligned + #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) + #endif +diff --git a/include/linux/capability.h b/include/linux/capability.h +index ecce0f43c73a..e46306dd4401 100644 +--- a/include/linux/capability.h ++++ b/include/linux/capability.h +@@ -208,6 +208,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); + extern bool has_ns_capability_noaudit(struct task_struct *t, + struct user_namespace *ns, int cap); + extern bool capable(int cap); ++extern bool capable_noaudit(int cap); + extern bool ns_capable(struct user_namespace *ns, int cap); + extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); + extern bool ns_capable_setid(struct user_namespace *ns, int cap); +@@ -234,6 +235,10 @@ static inline bool capable(int cap) + { + return true; + } ++static inline bool capable_noaudit(int cap) ++{ ++ return true; ++} + static inline bool ns_capable(struct user_namespace *ns, int cap) + { + return true; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 98e0349adb52..66d7049ddc2a 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -3627,4 +3627,15 @@ static inline int inode_drain_writes(struct inode *inode) + return filemap_write_and_wait(inode->i_mapping); + } + ++extern int device_sidechannel_restrict; ++ ++static inline bool is_sidechannel_device(const struct inode *inode) ++{ ++ umode_t mode; ++ if (!device_sidechannel_restrict) ++ return false; ++ mode = inode->i_mode; ++ return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH))); ++} ++ + #endif /* _LINUX_FS_H */ +diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h +index a2d5d175d3c1..e91ab06119b0 100644 +--- a/include/linux/fsnotify.h ++++ b/include/linux/fsnotify.h +@@ -233,6 +233,9 @@ static inline void fsnotify_access(struct file *file) + struct inode *inode = file_inode(file); + __u32 mask = FS_ACCESS; + ++ if (is_sidechannel_device(inode)) ++ return; ++ + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + +@@ -249,6 +252,9 @@ static inline void fsnotify_modify(struct file *file) + struct inode *inode = file_inode(file); + __u32 mask = FS_MODIFY; + ++ if (is_sidechannel_device(inode)) ++ return; ++ + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + +diff --git a/include/linux/gfp.h b/include/linux/gfp.h +index e5b817cb86e7..7a266a15dc48 100644 +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -553,9 +553,9 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, + extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); + extern unsigned long get_zeroed_page(gfp_t gfp_mask); + +-void *alloc_pages_exact(size_t size, gfp_t gfp_mask); ++void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __attribute__((alloc_size(1))); + void free_pages_exact(void *virt, size_t size); +-void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); ++void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __attribute__((alloc_size(2))); + + #define __get_free_page(gfp_mask) \ + __get_free_pages((gfp_mask), 0) +diff --git a/include/linux/highmem.h b/include/linux/highmem.h +index ea5cdbd8c2c3..805b84d6bbca 100644 +--- a/include/linux/highmem.h ++++ b/include/linux/highmem.h +@@ -215,6 +215,13 @@ static inline void clear_highpage(struct page *page) + kunmap_atomic(kaddr); + } + ++static inline void verify_zero_highpage(struct page *page) ++{ ++ void *kaddr = kmap_atomic(page); ++ BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE)); ++ kunmap_atomic(kaddr); ++} ++ + static inline void zero_user_segments(struct page *page, + unsigned start1, unsigned end1, + unsigned start2, unsigned end2) +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index c5fe60ec6b84..bdfb16b84d23 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -552,7 +552,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS]; + + struct softirq_action + { +- void (*action)(struct softirq_action *); ++ void (*action)(void); + }; + + asmlinkage void do_softirq(void); +@@ -567,7 +567,7 @@ static inline void do_softirq_own_stack(void) + } + #endif + +-extern void open_softirq(int nr, void (*action)(struct softirq_action *)); ++extern void __init open_softirq(int nr, void (*action)(void)); + extern void softirq_init(void); + extern void __raise_softirq_irqoff(unsigned int nr); + +diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h +index 069aa2ebef90..cb9e3637a620 100644 +--- a/include/linux/kobject_ns.h ++++ b/include/linux/kobject_ns.h +@@ -45,7 +45,7 @@ struct kobj_ns_type_operations { + void (*drop_ns)(void *); + }; + +-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); ++int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops); + int kobj_ns_type_registered(enum kobj_ns_type type); + const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); + const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); +diff --git a/include/linux/mm.h b/include/linux/mm.h +index cfaa8feecfe8..be23d358e0cd 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -649,7 +649,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) + } + #endif + +-extern void *kvmalloc_node(size_t size, gfp_t flags, int node); ++extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __attribute__((alloc_size(1))); + static inline void *kvmalloc(size_t size, gfp_t flags) + { + return kvmalloc_node(size, flags, NUMA_NO_NODE); +diff --git a/include/linux/percpu.h b/include/linux/percpu.h +index 5e76af742c80..9a6c682ec127 100644 +--- a/include/linux/percpu.h ++++ b/include/linux/percpu.h +@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, + pcpu_fc_populate_pte_fn_t populate_pte_fn); + #endif + +-extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); ++extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); + extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); + extern bool is_kernel_percpu_address(unsigned long addr); + +@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr); + extern void __init setup_per_cpu_areas(void); + #endif + +-extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); +-extern void __percpu *__alloc_percpu(size_t size, size_t align); ++extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __attribute__((alloc_size(1))); ++extern void __percpu *__alloc_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); + extern void free_percpu(void __percpu *__pdata); + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); + +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h +index 6d4c22aee384..3e64a054b42e 100644 +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -1283,6 +1283,11 @@ static inline int perf_is_paranoid(void) + return sysctl_perf_event_paranoid > -1; + } + ++static inline bool perf_paranoid_any(void) ++{ ++ return sysctl_perf_event_paranoid > 2; ++} ++ + static inline int perf_allow_kernel(struct perf_event_attr *attr) + { + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) +diff --git a/include/linux/slab.h b/include/linux/slab.h +index 877a95c6a2d2..19e7a853347d 100644 +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -184,8 +184,8 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *); + /* + * Common kmalloc functions provided by all allocators + */ +-void * __must_check __krealloc(const void *, size_t, gfp_t); +-void * __must_check krealloc(const void *, size_t, gfp_t); ++void * __must_check __krealloc(const void *, size_t, gfp_t) __attribute__((alloc_size(2))); ++void * __must_check krealloc(const void *, size_t, gfp_t) __attribute((alloc_size(2))); + void kfree(const void *); + void kzfree(const void *); + size_t __ksize(const void *); +@@ -390,7 +390,7 @@ static __always_inline unsigned int kmalloc_index(size_t size) + } + #endif /* !CONFIG_SLOB */ + +-void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; ++void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); + void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; + void kmem_cache_free(struct kmem_cache *, void *); + +@@ -414,7 +414,7 @@ static __always_inline void kfree_bulk(size_t size, void **p) + } + + #ifdef CONFIG_NUMA +-void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; ++void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); + void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; + #else + static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) +@@ -539,7 +539,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) + * Try really hard to succeed the allocation but fail + * eventually. + */ +-static __always_inline void *kmalloc(size_t size, gfp_t flags) ++static __always_inline __attribute__((alloc_size(1))) void *kmalloc(size_t size, gfp_t flags) + { + if (__builtin_constant_p(size)) { + #ifndef CONFIG_SLOB +@@ -561,7 +561,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) + return __kmalloc(size, flags); + } + +-static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) ++static __always_inline __attribute__((alloc_size(1))) void *kmalloc_node(size_t size, gfp_t flags, int node) + { + #ifndef CONFIG_SLOB + if (__builtin_constant_p(size) && +diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h +index d2153789bd9f..97da977d6060 100644 +--- a/include/linux/slub_def.h ++++ b/include/linux/slub_def.h +@@ -121,6 +121,11 @@ struct kmem_cache { + unsigned long random; + #endif + ++#ifdef CONFIG_SLAB_CANARY ++ unsigned long random_active; ++ unsigned long random_inactive; ++#endif ++ + #ifdef CONFIG_NUMA + /* + * Defragmentation by allocating from a remote node. +diff --git a/include/linux/string.h b/include/linux/string.h +index 02894e417565..d17a3a3eb77e 100644 +--- a/include/linux/string.h ++++ b/include/linux/string.h +@@ -270,10 +270,16 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob + void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); + void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); + ++#ifdef CONFIG_FORTIFY_SOURCE_STRICT_STRING ++#define __string_size(p) __builtin_object_size(p, 1) ++#else ++#define __string_size(p) __builtin_object_size(p, 0) ++#endif ++ + #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) + __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) + { +- size_t p_size = __builtin_object_size(p, 0); ++ size_t p_size = __string_size(p); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) +@@ -283,7 +289,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) + + __FORTIFY_INLINE char *strcat(char *p, const char *q) + { +- size_t p_size = __builtin_object_size(p, 0); ++ size_t p_size = __string_size(p); + if (p_size == (size_t)-1) + return __builtin_strcat(p, q); + if (strlcat(p, q, p_size) >= p_size) +@@ -294,7 +300,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) + __FORTIFY_INLINE __kernel_size_t strlen(const char *p) + { + __kernel_size_t ret; +- size_t p_size = __builtin_object_size(p, 0); ++ size_t p_size = __string_size(p); + + /* Work around gcc excess stack consumption issue */ + if (p_size == (size_t)-1 || +@@ -309,7 +315,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) + extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); + __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) + { +- size_t p_size = __builtin_object_size(p, 0); ++ size_t p_size = __string_size(p); + __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) + fortify_panic(__func__); +@@ -321,8 +327,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); + __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) + { + size_t ret; +- size_t p_size = __builtin_object_size(p, 0); +- size_t q_size = __builtin_object_size(q, 0); ++ size_t p_size = __string_size(p); ++ size_t q_size = __string_size(q); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __real_strlcpy(p, q, size); + ret = strlen(q); +@@ -342,8 +348,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) + __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) + { + size_t p_len, copy_len; +- size_t p_size = __builtin_object_size(p, 0); +- size_t q_size = __builtin_object_size(q, 0); ++ size_t p_size = __string_size(p); ++ size_t q_size = __string_size(q); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strncat(p, q, count); + p_len = strlen(p); +@@ -456,8 +462,8 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) + /* defined after fortified strlen and memcpy to reuse them */ + __FORTIFY_INLINE char *strcpy(char *p, const char *q) + { +- size_t p_size = __builtin_object_size(p, 0); +- size_t q_size = __builtin_object_size(q, 0); ++ size_t p_size = __string_size(p); ++ size_t q_size = __string_size(q); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + memcpy(p, q, strlen(q) + 1); +diff --git a/include/linux/tty.h b/include/linux/tty.h +index bd5fe0e907e8..5d0444635d57 100644 +--- a/include/linux/tty.h ++++ b/include/linux/tty.h +@@ -14,6 +14,7 @@ + #include <uapi/linux/tty.h> + #include <linux/rwsem.h> + #include <linux/llist.h> ++#include <linux/user_namespace.h> + + + /* +@@ -338,6 +339,7 @@ struct tty_struct { + /* If the tty has a pending do_SAK, queue it here - akpm */ + struct work_struct SAK_work; + struct tty_port *port; ++ struct user_namespace *owner_user_ns; + } __randomize_layout; + + /* Each of a tty's open files has private_data pointing to tty_file_private */ +@@ -347,6 +349,8 @@ struct tty_file_private { + struct list_head list; + }; + ++extern int tiocsti_restrict; ++ + /* tty magic number */ + #define TTY_MAGIC 0x5401 + +diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h +index a4b241102771..f4d8265b7da4 100644 +--- a/include/linux/vmalloc.h ++++ b/include/linux/vmalloc.h +@@ -100,20 +100,20 @@ static inline void vmalloc_init(void) + static inline unsigned long vmalloc_nr_pages(void) { return 0; } + #endif + +-extern void *vmalloc(unsigned long size); +-extern void *vzalloc(unsigned long size); +-extern void *vmalloc_user(unsigned long size); +-extern void *vmalloc_node(unsigned long size, int node); +-extern void *vzalloc_node(unsigned long size, int node); +-extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); +-extern void *vmalloc_exec(unsigned long size); +-extern void *vmalloc_32(unsigned long size); +-extern void *vmalloc_32_user(unsigned long size); +-extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); ++extern void *vmalloc(unsigned long size) __attribute__((alloc_size(1))); ++extern void *vzalloc(unsigned long size) __attribute__((alloc_size(1))); ++extern void *vmalloc_user(unsigned long size) __attribute__((alloc_size(1))); ++extern void *vmalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); ++extern void *vzalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); ++extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) __attribute__((alloc_size(1))); ++extern void *vmalloc_exec(unsigned long size) __attribute__((alloc_size(1))); ++extern void *vmalloc_32(unsigned long size) __attribute__((alloc_size(1))); ++extern void *vmalloc_32_user(unsigned long size) __attribute__((alloc_size(1))); ++extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) __attribute__((alloc_size(1))); + extern void *__vmalloc_node_range(unsigned long size, unsigned long align, + unsigned long start, unsigned long end, gfp_t gfp_mask, + pgprot_t prot, unsigned long vm_flags, int node, +- const void *caller); ++ const void *caller) __attribute__((alloc_size(1))); + #ifndef CONFIG_MMU + extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); + static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, +diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h +index e42d13b55cf3..3228bcfe7599 100644 +--- a/include/uapi/linux/ip.h ++++ b/include/uapi/linux/ip.h +@@ -66,7 +66,11 @@ + + #define IPVERSION 4 + #define MAXTTL 255 ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++#define IPDEFTTL 128 ++#else + #define IPDEFTTL 64 ++#endif + + #define IPOPT_OPTVAL 0 + #define IPOPT_OLEN 1 +diff --git a/init/Kconfig b/init/Kconfig +index 47d40f399000..50fd0c86a190 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -362,6 +362,7 @@ config USELIB + config AUDIT + bool "Auditing support" + depends on NET ++ default y + help + Enable auditing infrastructure that can be used with another + kernel subsystem, such as SELinux (which requires this for +@@ -1103,6 +1104,22 @@ config USER_NS + + If unsure, say N. + ++config USER_NS_UNPRIVILEGED ++ bool "Allow unprivileged users to create namespaces" ++ depends on USER_NS ++ default n ++ help ++ When disabled, unprivileged users will not be able to create ++ new namespaces. Allowing users to create their own namespaces ++ has been part of several recent local privilege escalation ++ exploits, so if you need user namespaces but are ++ paranoid^Wsecurity-conscious you want to disable this. ++ ++ This setting can be overridden at runtime via the ++ kernel.unprivileged_userns_clone sysctl. ++ ++ If unsure, say N. ++ + config PID_NS + bool "PID Namespaces" + default y +@@ -1506,8 +1523,7 @@ config SHMEM + which may be appropriate on small systems without swap. + + config AIO +- bool "Enable AIO support" if EXPERT +- default y ++ bool "Enable AIO support" + help + This option enables POSIX asynchronous I/O which may by used + by some high performance threaded applications. Disabling +@@ -1619,6 +1635,23 @@ config USERFAULTFD + Enable the userfaultfd() system call that allows to intercept and + handle page faults in userland. + ++config USERFAULTFD_UNPRIVILEGED ++ bool "Allow unprivileged users to use the userfaultfd syscall" ++ depends on USERFAULTFD ++ default n ++ help ++ When disabled, unprivileged users will not be able to use the userfaultfd ++ syscall. Userfaultfd provide attackers with a way to stall a kernel ++ thread in the middle of memory accesses from userspace by initiating an ++ access on an unmapped page. To avoid various heap grooming and heap ++ spraying techniques for exploiting use-after-free flaws this should be ++ disabled by default. ++ ++ This setting can be overridden at runtime via the ++ vm.unprivileged_userfaultfd sysctl. ++ ++ If unsure, say N. ++ + config ARCH_HAS_MEMBARRIER_CALLBACKS + bool + +@@ -1731,7 +1764,7 @@ config VM_EVENT_COUNTERS + + config SLUB_DEBUG + default y +- bool "Enable SLUB debugging support" if EXPERT ++ bool "Enable SLUB debugging support" + depends on SLUB && SYSFS + help + SLUB has extensive debug support features. Disabling these can +@@ -1755,7 +1788,6 @@ config SLUB_MEMCG_SYSFS_ON + + config COMPAT_BRK + bool "Disable heap randomization" +- default y + help + Randomizing heap placement makes heap exploits harder, but it + also breaks ancient binaries (including anything libc5 based). +@@ -1802,7 +1834,6 @@ endchoice + + config SLAB_MERGE_DEFAULT + bool "Allow slab caches to be merged" +- default y + help + For reduced kernel memory fragmentation, slab caches can be + merged when they share the same size and other characteristics. +@@ -1815,9 +1846,9 @@ config SLAB_MERGE_DEFAULT + command line. + + config SLAB_FREELIST_RANDOM +- default n + depends on SLAB || SLUB + bool "SLAB freelist randomization" ++ default y + help + Randomizes the freelist order used on creating new pages. This + security feature reduces the predictability of the kernel slab +@@ -1826,12 +1857,30 @@ config SLAB_FREELIST_RANDOM + config SLAB_FREELIST_HARDENED + bool "Harden slab freelist metadata" + depends on SLUB ++ default y + help + Many kernel heap attacks try to target slab cache metadata and + other infrastructure. This options makes minor performance + sacrifices to harden the kernel slab allocator against common + freelist exploit methods. + ++config SLAB_CANARY ++ depends on SLUB ++ depends on !SLAB_MERGE_DEFAULT ++ bool "SLAB canaries" ++ default y ++ help ++ Place canaries at the end of kernel slab allocations, sacrificing ++ some performance and memory usage for security. ++ ++ Canaries can detect some forms of heap corruption when allocations ++ are freed and as part of the HARDENED_USERCOPY feature. It provides ++ basic use-after-free detection for HARDENED_USERCOPY. ++ ++ Canaries absorb small overflows (rendering them harmless), mitigate ++ non-NUL terminated C string overflows on 64-bit via a guaranteed zero ++ byte and provide basic double-free detection. ++ + config SHUFFLE_PAGE_ALLOCATOR + bool "Page allocator randomization" + default SLAB_FREELIST_RANDOM && ACPI_NUMA +diff --git a/kernel/audit.c b/kernel/audit.c +index f971cd636426..b93288621224 100644 +--- a/kernel/audit.c ++++ b/kernel/audit.c +@@ -1630,6 +1630,9 @@ static int __init audit_enable(char *str) + + if (audit_default == AUDIT_OFF) + audit_initialized = AUDIT_DISABLED; ++ else if (!audit_ever_enabled) ++ audit_initialized = AUDIT_UNINITIALIZED; ++ + if (audit_set_enabled(audit_default)) + pr_err("audit: error setting audit state (%d)\n", + audit_default); +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index af6b738cf435..cae1a03aafeb 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -521,7 +521,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) + #ifdef CONFIG_BPF_JIT + /* All BPF JIT sysctl knobs here. */ + int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +-int bpf_jit_harden __read_mostly; ++int bpf_jit_harden __read_mostly = 2; + int bpf_jit_kallsyms __read_mostly; + long bpf_jit_limit __read_mostly; + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index e3461ec59570..a12e67d504ee 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(prog_idr_lock); + static DEFINE_IDR(map_idr); + static DEFINE_SPINLOCK(map_idr_lock); + +-int sysctl_unprivileged_bpf_disabled __read_mostly; ++int sysctl_unprivileged_bpf_disabled __read_mostly = 1; + + static const struct bpf_map_ops * const bpf_map_types[] = { + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) +diff --git a/kernel/capability.c b/kernel/capability.c +index 1444f3954d75..8cc9dd7992f2 100644 +--- a/kernel/capability.c ++++ b/kernel/capability.c +@@ -449,6 +449,12 @@ bool capable(int cap) + return ns_capable(&init_user_ns, cap); + } + EXPORT_SYMBOL(capable); ++ ++bool capable_noaudit(int cap) ++{ ++ return ns_capable_noaudit(&init_user_ns, cap); ++} ++EXPORT_SYMBOL(capable_noaudit); + #endif /* CONFIG_MULTIUSER */ + + /** +diff --git a/kernel/events/core.c b/kernel/events/core.c +index fdb7f7ef380c..7f310a91abff 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -398,8 +398,13 @@ static cpumask_var_t perf_online_mask; + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu events for unpriv + * 2 - disallow kernel profiling for unpriv ++ * 3 - disallow all unpriv perf event use + */ ++#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT ++int sysctl_perf_event_paranoid __read_mostly = 3; ++#else + int sysctl_perf_event_paranoid __read_mostly = 2; ++#endif + + /* Minimum for 512 kiB + 1 user control page */ + int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ +@@ -11179,6 +11184,9 @@ SYSCALL_DEFINE5(perf_event_open, + if (flags & ~PERF_FLAG_ALL) + return -EINVAL; + ++ if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) +diff --git a/kernel/fork.c b/kernel/fork.c +index 080809560072..1cb7b827b57b 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -106,6 +106,11 @@ + + #define CREATE_TRACE_POINTS + #include <trace/events/task.h> ++#ifdef CONFIG_USER_NS ++extern int unprivileged_userns_clone; ++#else ++#define unprivileged_userns_clone 0 ++#endif + + /* + * Minimum number of threads to boot the kernel +@@ -1843,6 +1848,10 @@ static __latent_entropy struct task_struct *copy_process( + if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) + return ERR_PTR(-EINVAL); + ++ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) ++ if (!capable(CAP_SYS_ADMIN)) ++ return ERR_PTR(-EPERM); ++ + /* + * Thread groups must share signals as well, and detached threads + * can only be started up within the thread group. +@@ -2923,6 +2932,12 @@ int ksys_unshare(unsigned long unshare_flags) + if (unshare_flags & CLONE_NEWNS) + unshare_flags |= CLONE_FS; + ++ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { ++ err = -EPERM; ++ if (!capable(CAP_SYS_ADMIN)) ++ goto bad_unshare_out; ++ } ++ + err = check_unshare_flags(unshare_flags); + if (err) + goto bad_unshare_out; +diff --git a/kernel/kmod.c b/kernel/kmod.c +index bc6addd9152b..008be43f6cdd 100644 +--- a/kernel/kmod.c ++++ b/kernel/kmod.c +@@ -149,6 +149,13 @@ int __request_module(bool wait, const char *fmt, ...) + if (ret) + return ret; + ++#ifdef CONFIG_HARDENED_MODULE_LOAD ++ if (uid_eq(current_uid(), GLOBAL_ROOT_UID)) { ++ printk(KERN_ALERT "denied attempt to auto-load module %.64s\n", module_name); ++ return -EPERM; ++ } ++#endif ++ + if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) { + pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", + atomic_read(&kmod_concurrent_max), +diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c +index d65f2d5ab694..145e3c62c380 100644 +--- a/kernel/power/snapshot.c ++++ b/kernel/power/snapshot.c +@@ -1150,6 +1150,9 @@ void clear_free_pages(void) + struct memory_bitmap *bm = free_pages_map; + unsigned long pfn; + ++ if (!IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && !want_init_on_free()) ++ return; ++ + if (WARN_ON(!(free_pages_map))) + return; + +diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c +index 477b4eb44af5..db28cc3fd301 100644 +--- a/kernel/rcu/tiny.c ++++ b/kernel/rcu/tiny.c +@@ -74,7 +74,7 @@ void rcu_sched_clock_irq(int user) + } + + /* Invoke the RCU callbacks whose grace period has elapsed. */ +-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) ++static __latent_entropy void rcu_process_callbacks(void) + { + struct rcu_head *next, *list; + unsigned long flags; +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index 6145e08a1407..684f5a706abc 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -2413,7 +2413,7 @@ static __latent_entropy void rcu_core(void) + trace_rcu_utilization(TPS("End RCU core")); + } + +-static void rcu_core_si(struct softirq_action *h) ++static void rcu_core_si(void) + { + rcu_core(); + } +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index b0ee5eedeccd..cd89f2d34a9d 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10209,7 +10209,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) + * run_rebalance_domains is triggered when needed from the scheduler tick. + * Also triggered for nohz idle balancing (with nohz_balancing_kick set). + */ +-static __latent_entropy void run_rebalance_domains(struct softirq_action *h) ++static __latent_entropy void run_rebalance_domains(void) + { + struct rq *this_rq = this_rq(); + enum cpu_idle_type idle = this_rq->idle_balance ? +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 0427a86743a4..5e6a9b4ccb41 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -52,7 +52,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); + EXPORT_PER_CPU_SYMBOL(irq_stat); + #endif + +-static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; ++static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE); + + DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + +@@ -289,7 +289,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) + kstat_incr_softirqs_this_cpu(vec_nr); + + trace_softirq_entry(vec_nr); +- h->action(h); ++ h->action(); + trace_softirq_exit(vec_nr); + if (unlikely(prev_count != preempt_count())) { + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", +@@ -452,7 +452,7 @@ void __raise_softirq_irqoff(unsigned int nr) + or_softirq_pending(1UL << nr); + } + +-void open_softirq(int nr, void (*action)(struct softirq_action *)) ++void __init open_softirq(int nr, void (*action)(void)) + { + softirq_vec[nr].action = action; + } +@@ -498,8 +498,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) + } + EXPORT_SYMBOL(__tasklet_hi_schedule); + +-static void tasklet_action_common(struct softirq_action *a, +- struct tasklet_head *tl_head, ++static void tasklet_action_common(struct tasklet_head *tl_head, + unsigned int softirq_nr) + { + struct tasklet_struct *list; +@@ -536,14 +535,14 @@ static void tasklet_action_common(struct softirq_action *a, + } + } + +-static __latent_entropy void tasklet_action(struct softirq_action *a) ++static __latent_entropy void tasklet_action(void) + { +- tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); ++ tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); + } + +-static __latent_entropy void tasklet_hi_action(struct softirq_action *a) ++static __latent_entropy void tasklet_hi_action(void) + { +- tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); ++ tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); + } + + void tasklet_init(struct tasklet_struct *t, +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index 70665934d53e..9b2fc21fb844 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -68,6 +68,7 @@ + #include <linux/bpf.h> + #include <linux/mount.h> + #include <linux/userfaultfd_k.h> ++#include <linux/tty.h> + + #include "../lib/kstrtox.h" + +@@ -104,12 +105,25 @@ + #if defined(CONFIG_SYSCTL) + + /* External variables not in a header file. */ ++#if IS_ENABLED(CONFIG_USB) ++int deny_new_usb __read_mostly = 0; ++EXPORT_SYMBOL(deny_new_usb); ++#endif ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++int __read_mostly sysfs_restricted = 1; ++#endif ++#ifdef CONFIG_HARDENED_FIFO ++int __read_mostly fifo_restrictions = 1; ++#endif + extern int suid_dumpable; + #ifdef CONFIG_COREDUMP + extern int core_uses_pid; + extern char core_pattern[]; + extern unsigned int core_pipe_limit; + #endif ++#ifdef CONFIG_USER_NS ++extern int unprivileged_userns_clone; ++#endif + extern int pid_max; + extern int pid_max_min, pid_max_max; + extern int percpu_pagelist_fraction; +@@ -121,32 +135,32 @@ extern int sysctl_nr_trim_pages; + + /* Constants used for minimum and maximum */ + #ifdef CONFIG_LOCKUP_DETECTOR +-static int sixty = 60; ++static int sixty __read_only = 60; + #endif + +-static int __maybe_unused neg_one = -1; +-static int __maybe_unused two = 2; +-static int __maybe_unused four = 4; +-static unsigned long zero_ul; +-static unsigned long one_ul = 1; +-static unsigned long long_max = LONG_MAX; +-static int one_hundred = 100; +-static int one_thousand = 1000; ++static int __maybe_unused neg_one __read_only = -1; ++static int __maybe_unused two __read_only = 2; ++static int __maybe_unused four __read_only = 4; ++static unsigned long zero_ul __read_only; ++static unsigned long one_ul __read_only = 1; ++static unsigned long long_max __read_only = LONG_MAX; ++static int one_hundred __read_only = 100; ++static int one_thousand __read_only = 1000; + #ifdef CONFIG_PRINTK +-static int ten_thousand = 10000; ++static int ten_thousand __read_only = 10000; + #endif + #ifdef CONFIG_PERF_EVENTS +-static int six_hundred_forty_kb = 640 * 1024; ++static int six_hundred_forty_kb __read_only = 640 * 1024; + #endif + + /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +-static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; ++static unsigned long dirty_bytes_min __read_only = 2 * PAGE_SIZE; + + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ +-static int maxolduid = 65535; +-static int minolduid; ++static int maxolduid __read_only = 65535; ++static int minolduid __read_only; + +-static int ngroups_max = NGROUPS_MAX; ++static int ngroups_max __read_only = NGROUPS_MAX; + static const int cap_last_cap = CAP_LAST_CAP; + + /* +@@ -154,9 +168,12 @@ static const int cap_last_cap = CAP_LAST_CAP; + * and hung_task_check_interval_secs + */ + #ifdef CONFIG_DETECT_HUNG_TASK +-static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); ++static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ); + #endif + ++int device_sidechannel_restrict __read_mostly = 1; ++EXPORT_SYMBOL(device_sidechannel_restrict); ++ + #ifdef CONFIG_INOTIFY_USER + #include <linux/inotify.h> + #endif +@@ -301,19 +318,19 @@ static struct ctl_table sysctl_base_table[] = { + }; + + #ifdef CONFIG_SCHED_DEBUG +-static int min_sched_granularity_ns = 100000; /* 100 usecs */ +-static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ +-static int min_wakeup_granularity_ns; /* 0 usecs */ +-static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ ++static int min_sched_granularity_ns __read_only = 100000; /* 100 usecs */ ++static int max_sched_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ ++static int min_wakeup_granularity_ns __read_only; /* 0 usecs */ ++static int max_wakeup_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ + #ifdef CONFIG_SMP +-static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; +-static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; ++static int min_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_NONE; ++static int max_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_END-1; + #endif /* CONFIG_SMP */ + #endif /* CONFIG_SCHED_DEBUG */ + + #ifdef CONFIG_COMPACTION +-static int min_extfrag_threshold; +-static int max_extfrag_threshold = 1000; ++static int min_extfrag_threshold __read_only; ++static int max_extfrag_threshold __read_only = 1000; + #endif + + static struct ctl_table kern_table[] = { +@@ -546,6 +563,15 @@ static struct ctl_table kern_table[] = { + .proc_handler = proc_dointvec, + }, + #endif ++#ifdef CONFIG_USER_NS ++ { ++ .procname = "unprivileged_userns_clone", ++ .data = &unprivileged_userns_clone, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++#endif + #ifdef CONFIG_PROC_SYSCTL + { + .procname = "tainted", +@@ -901,6 +927,59 @@ static struct ctl_table kern_table[] = { + .extra1 = SYSCTL_ZERO, + .extra2 = &two, + }, ++#endif ++#if defined CONFIG_TTY ++ { ++ .procname = "tiocsti_restrict", ++ .data = &tiocsti_restrict, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax_sysadmin, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#endif ++ { ++ .procname = "device_sidechannel_restrict", ++ .data = &device_sidechannel_restrict, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax_sysadmin, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#if IS_ENABLED(CONFIG_USB) ++ { ++ .procname = "deny_new_usb", ++ .data = &deny_new_usb, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax_sysadmin, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#endif ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++ { ++ .procname = "sysfs_restricted", ++ .data = &sysfs_restricted, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax_sysadmin, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#endif ++#ifdef CONFIG_HARDENED_FIFO ++ { ++ .procname = "fifo_restrictions", ++ .data = &fifo_restrictions, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax_sysadmin, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, + #endif + { + .procname = "ngroups_max", +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 8de90ea31280..559417d71602 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1583,7 +1583,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, + } + } + +-static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) ++static __latent_entropy void hrtimer_run_softirq(void) + { + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 4820823515e9..1a61e5aa87ae 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1779,7 +1779,7 @@ static inline void __run_timers(struct timer_base *base) + /* + * This function runs timers and the timer-tq in bottom half context. + */ +-static __latent_entropy void run_timer_softirq(struct softirq_action *h) ++static __latent_entropy void run_timer_softirq(void) + { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + +diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c +index 8eadadc478f9..c36ecd19562c 100644 +--- a/kernel/user_namespace.c ++++ b/kernel/user_namespace.c +@@ -21,6 +21,13 @@ + #include <linux/bsearch.h> + #include <linux/sort.h> + ++/* sysctl */ ++#ifdef CONFIG_USER_NS_UNPRIVILEGED ++int unprivileged_userns_clone = 1; ++#else ++int unprivileged_userns_clone; ++#endif ++ + static struct kmem_cache *user_ns_cachep __read_mostly; + static DEFINE_MUTEX(userns_state_mutex); + +diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug +index 5ffe144c9794..7d2a5391d3a2 100644 +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -347,6 +347,9 @@ config SECTION_MISMATCH_WARN_ONLY + + If unsure, say Y. + ++config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE ++ bool "Enable verbose reporting of writable function pointers" ++ + # + # Select this config option from the architecture Kconfig, if it + # is preferred to always offer frame pointers as a config +@@ -798,6 +801,7 @@ menu "Debug Oops, Lockups and Hangs" + + config PANIC_ON_OOPS + bool "Panic on Oops" ++ default y + help + Say Y here to enable the kernel to panic when it oopses. This + has the same effect as setting oops=panic on the kernel command +@@ -807,7 +811,7 @@ config PANIC_ON_OOPS + anything erroneous after an oops which could result in data + corruption or other issues. + +- Say N if unsure. ++ Say Y if unsure. + + config PANIC_ON_OOPS_VALUE + int +@@ -1317,6 +1321,7 @@ menu "Debug kernel data structures" + config DEBUG_LIST + bool "Debug linked list manipulation" + depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION ++ default y + help + Enable this to turn on extended checks in the linked-list + walking routines. +@@ -1356,6 +1361,7 @@ config DEBUG_NOTIFIERS + config BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + select DEBUG_LIST ++ default y + help + Select this option if the kernel should BUG when it encounters + data corruption in kernel memory structures when they get checked +@@ -1511,6 +1517,7 @@ config STRICT_DEVMEM + config IO_STRICT_DEVMEM + bool "Filter I/O access to /dev/mem" + depends on STRICT_DEVMEM ++ default y + help + If this option is disabled, you allow userspace (root) access to all + io-memory regardless of whether a driver is actively using that +diff --git a/lib/irq_poll.c b/lib/irq_poll.c +index 2f17b488d58e..b6e7996a0058 100644 +--- a/lib/irq_poll.c ++++ b/lib/irq_poll.c +@@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop) + } + EXPORT_SYMBOL(irq_poll_complete); + +-static void __latent_entropy irq_poll_softirq(struct softirq_action *h) ++static void __latent_entropy irq_poll_softirq(void) + { + struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); + int rearm = 0, budget = irq_poll_budget; +diff --git a/lib/kobject.c b/lib/kobject.c +index 83198cb37d8d..4a053b7aef42 100644 +--- a/lib/kobject.c ++++ b/lib/kobject.c +@@ -1009,9 +1009,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add); + + + static DEFINE_SPINLOCK(kobj_ns_type_lock); +-static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; ++static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init; + +-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) ++int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops) + { + enum kobj_ns_type type = ops->type; + int error; +diff --git a/lib/nlattr.c b/lib/nlattr.c +index cace9b307781..39ba1387045d 100644 +--- a/lib/nlattr.c ++++ b/lib/nlattr.c +@@ -571,6 +571,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) + { + int minlen = min_t(int, count, nla_len(src)); + ++ BUG_ON(minlen < 0); ++ + memcpy(dest, nla_data(src), minlen); + if (count > minlen) + memset(dest + minlen, 0, count - minlen); +diff --git a/lib/vsprintf.c b/lib/vsprintf.c +index 7c488a1ce318..27e16ab859fe 100644 +--- a/lib/vsprintf.c ++++ b/lib/vsprintf.c +@@ -810,7 +810,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, + return pointer_string(buf, end, (const void *)hashval, spec); + } + +-int kptr_restrict __read_mostly; ++int kptr_restrict __read_mostly = 2; + + static noinline_for_stack + char *restricted_pointer(char *buf, char *end, const void *ptr, +diff --git a/mm/Kconfig b/mm/Kconfig +index ab80933be65f..5012bf12aab6 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -304,7 +304,8 @@ config KSM + config DEFAULT_MMAP_MIN_ADDR + int "Low address space to protect from user allocation" + depends on MMU +- default 4096 ++ default 32768 if ARM || (ARM64 && COMPAT) ++ default 65536 + help + This is the portion of low virtual memory which should be protected + from userspace allocation. Keeping a user from writing to low pages +diff --git a/mm/mmap.c b/mm/mmap.c +index cb2c79a3e914..336947ee89e9 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -228,6 +228,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) + + newbrk = PAGE_ALIGN(brk); + oldbrk = PAGE_ALIGN(mm->brk); ++ /* properly handle unaligned min_brk as an empty heap */ ++ if (min_brk & ~PAGE_MASK) { ++ if (brk == min_brk) ++ newbrk -= PAGE_SIZE; ++ if (mm->brk == min_brk) ++ oldbrk -= PAGE_SIZE; ++ } + if (oldbrk == newbrk) { + mm->brk = brk; + goto success; +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 627f1eba6df7..496b41f1e7d8 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -68,6 +68,7 @@ + #include <linux/lockdep.h> + #include <linux/nmi.h> + #include <linux/psi.h> ++#include <linux/random.h> + + #include <asm/sections.h> + #include <asm/tlbflush.h> +@@ -106,6 +107,15 @@ struct pcpu_drain { + DEFINE_MUTEX(pcpu_drain_mutex); + DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); + ++bool __meminitdata extra_latent_entropy; ++ ++static int __init setup_extra_latent_entropy(char *str) ++{ ++ extra_latent_entropy = true; ++ return 0; ++} ++early_param("extra_latent_entropy", setup_extra_latent_entropy); ++ + #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY + volatile unsigned long latent_entropy __latent_entropy; + EXPORT_SYMBOL(latent_entropy); +@@ -1427,6 +1437,25 @@ static void __free_pages_ok(struct page *page, unsigned int order) + local_irq_restore(flags); + } + ++static void __init __gather_extra_latent_entropy(struct page *page, ++ unsigned int nr_pages) ++{ ++ if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { ++ unsigned long hash = 0; ++ size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; ++ const unsigned long *data = lowmem_page_address(page); ++ ++ for (index = 0; index < end; index++) ++ hash ^= hash + data[index]; ++#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY ++ latent_entropy ^= hash; ++ add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); ++#else ++ add_device_randomness((const void *)&hash, sizeof(hash)); ++#endif ++ } ++} ++ + void __free_pages_core(struct page *page, unsigned int order) + { + unsigned int nr_pages = 1 << order; +@@ -1441,7 +1470,6 @@ void __free_pages_core(struct page *page, unsigned int order) + } + __ClearPageReserved(p); + set_page_count(p, 0); +- + atomic_long_add(nr_pages, &page_zone(page)->managed_pages); + set_page_refcounted(page); + __free_pages(page, order); +@@ -1492,6 +1520,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, + { + if (early_page_uninitialised(pfn)) + return; ++ __gather_extra_latent_entropy(page, 1 << order); + __free_pages_core(page, order); + } + +@@ -1582,6 +1611,7 @@ static void __init deferred_free_range(unsigned long pfn, + if (nr_pages == pageblock_nr_pages && + (pfn & (pageblock_nr_pages - 1)) == 0) { + set_pageblock_migratetype(page, MIGRATE_MOVABLE); ++ __gather_extra_latent_entropy(page, 1 << pageblock_order); + __free_pages_core(page, pageblock_order); + return; + } +@@ -1589,6 +1619,7 @@ static void __init deferred_free_range(unsigned long pfn, + for (i = 0; i < nr_pages; i++, page++, pfn++) { + if ((pfn & (pageblock_nr_pages - 1)) == 0) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); ++ __gather_extra_latent_entropy(page, 1); + __free_pages_core(page, 0); + } + } +@@ -2156,6 +2187,12 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags + { + post_alloc_hook(page, order, gfp_flags); + ++ if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) { ++ int i; ++ for (i = 0; i < (1 << order); i++) ++ verify_zero_highpage(page + i); ++ } ++ + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) + kernel_init_free_pages(page, 1 << order); + +diff --git a/mm/slab.h b/mm/slab.h +index 7e94700aa78c..975a75b7230c 100644 +--- a/mm/slab.h ++++ b/mm/slab.h +@@ -470,9 +470,13 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) + struct page *page; + + page = virt_to_head_page(obj); ++#ifdef CONFIG_BUG_ON_DATA_CORRUPTION ++ BUG_ON(!PageSlab(page)); ++#else + if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n", + __func__)) + return NULL; ++#endif + return page->slab_cache; + } + +@@ -518,9 +522,14 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) + return s; + + cachep = virt_to_cache(x); +- WARN_ONCE(cachep && !slab_equal_or_root(cachep, s), +- "%s: Wrong slab cache. %s but object is from %s\n", +- __func__, s->name, cachep->name); ++ if (cachep && !slab_equal_or_root(cachep, s)) { ++#ifdef CONFIG_BUG_ON_DATA_CORRUPTION ++ BUG(); ++#else ++ WARN_ONCE(1, "%s: Wrong slab cache. %s but object is from %s\n", ++ __func__, s->name, cachep->name); ++#endif ++ } + return cachep; + } + +@@ -545,7 +554,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) + * back there or track user information then we can + * only use the space before that information. + */ +- if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) ++ if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY)) + return s->inuse; + /* + * Else we can use all the padding etc for the allocation +@@ -674,8 +683,10 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } + static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) + { + if (static_branch_unlikely(&init_on_alloc)) { ++#ifndef CONFIG_SLUB + if (c->ctor) + return false; ++#endif + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return flags & __GFP_ZERO; + return true; +@@ -685,9 +696,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) + + static inline bool slab_want_init_on_free(struct kmem_cache *c) + { +- if (static_branch_unlikely(&init_on_free)) +- return !(c->ctor || +- (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); ++ if (static_branch_unlikely(&init_on_free)) { ++#ifndef CONFIG_SLUB ++ if (c->ctor) ++ return false; ++#endif ++ if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ++ return false; ++ return true; ++ } + return false; + } + +diff --git a/mm/slab_common.c b/mm/slab_common.c +index 0d95ddea13b0..965bba106eab 100644 +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -28,10 +28,10 @@ + + #include "slab.h" + +-enum slab_state slab_state; ++enum slab_state slab_state __ro_after_init; + LIST_HEAD(slab_caches); + DEFINE_MUTEX(slab_mutex); +-struct kmem_cache *kmem_cache; ++struct kmem_cache *kmem_cache __ro_after_init; + + #ifdef CONFIG_HARDENED_USERCOPY + bool usercopy_fallback __ro_after_init = +@@ -59,7 +59,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, + /* + * Merge control. If this is set then no merging of slab caches will occur. + */ +-static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); ++static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); + + static int __init setup_slab_nomerge(char *str) + { +diff --git a/mm/slub.c b/mm/slub.c +index 8eafccf75940..bb9ab439ad29 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -123,6 +123,12 @@ static inline int kmem_cache_debug(struct kmem_cache *s) + #endif + } + ++static inline bool has_sanitize_verify(struct kmem_cache *s) ++{ ++ return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && ++ slab_want_init_on_free(s); ++} ++ + void *fixup_red_left(struct kmem_cache *s, void *p) + { + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) +@@ -307,6 +313,35 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) + *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); + } + ++#ifdef CONFIG_SLAB_CANARY ++static inline unsigned long *get_canary(struct kmem_cache *s, void *object) ++{ ++ if (s->offset) ++ return object + s->offset + sizeof(void *); ++ return object + s->inuse; ++} ++ ++static inline unsigned long get_canary_value(const void *canary, unsigned long value) ++{ ++ return (value ^ (unsigned long)canary) & CANARY_MASK; ++} ++ ++static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value) ++{ ++ unsigned long *canary = get_canary(s, object); ++ *canary = get_canary_value(canary, value); ++} ++ ++static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value) ++{ ++ unsigned long *canary = get_canary(s, object); ++ BUG_ON(*canary != get_canary_value(canary, value)); ++} ++#else ++#define set_canary(s, object, value) ++#define check_canary(s, object, value) ++#endif ++ + /* Loop over all objects in a slab */ + #define for_each_object(__p, __s, __addr, __objects) \ + for (__p = fixup_red_left(__s, __addr); \ +@@ -474,13 +509,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p) + * Debug settings: + */ + #if defined(CONFIG_SLUB_DEBUG_ON) +-static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; ++static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS; + #else +-static slab_flags_t slub_debug; ++static slab_flags_t slub_debug __ro_after_init; + #endif + +-static char *slub_debug_slabs; +-static int disable_higher_order_debug; ++static char *slub_debug_slabs __ro_after_init; ++static int disable_higher_order_debug __ro_after_init; + + /* + * slub is about to manipulate internal object metadata. This memory lies +@@ -541,6 +576,9 @@ static struct track *get_track(struct kmem_cache *s, void *object, + else + p = object + s->inuse; + ++ if (IS_ENABLED(CONFIG_SLAB_CANARY)) ++ p = (void *)p + sizeof(void *); ++ + return p + alloc; + } + +@@ -671,6 +709,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) + else + off = s->inuse; + ++ if (IS_ENABLED(CONFIG_SLAB_CANARY)) ++ off += sizeof(void *); ++ + if (s->flags & SLAB_STORE_USER) + off += 2 * sizeof(struct track); + +@@ -802,6 +843,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) + /* Freepointer is placed after the object. */ + off += sizeof(void *); + ++ if (IS_ENABLED(CONFIG_SLAB_CANARY)) ++ off += sizeof(void *); ++ + if (s->flags & SLAB_STORE_USER) + /* We also have user information there */ + off += 2 * sizeof(struct track); +@@ -1442,6 +1486,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, + object = next; + next = get_freepointer(s, object); + ++ check_canary(s, object, s->random_active); ++ + if (slab_want_init_on_free(s)) { + /* + * Clear the object and the metadata, but don't touch +@@ -1452,8 +1498,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, + : 0; + memset((char *)object + s->inuse, 0, + s->size - s->inuse - rsize); +- ++ if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) ++ s->ctor(object); + } ++ ++ set_canary(s, object, s->random_inactive); ++ + /* If object's reuse doesn't have to be delayed */ + if (!slab_free_hook(s, object)) { + /* Move object to the new freelist */ +@@ -1461,6 +1511,17 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, + *head = object; + if (!*tail) + *tail = object; ++ } else if (slab_want_init_on_free(s) && s->ctor) { ++ /* Objects that are put into quarantine by KASAN will ++ * still undergo free_consistency_checks() and thus ++ * need to show a valid freepointer to check_object(). ++ * ++ * Note that doing this for all caches (not just ctor ++ * ones, which have s->offset != NULL)) causes a GPF, ++ * due to KASAN poisoning and the way set_freepointer() ++ * eventually dereferences the freepointer. ++ */ ++ set_freepointer(s, object, NULL); + } + } while (object != old_tail); + +@@ -1474,8 +1535,9 @@ static void *setup_object(struct kmem_cache *s, struct page *page, + void *object) + { + setup_object_debug(s, page, object); ++ set_canary(s, object, s->random_inactive); + object = kasan_init_slab_obj(s, object); +- if (unlikely(s->ctor)) { ++ if (unlikely(s->ctor) && !has_sanitize_verify(s)) { + kasan_unpoison_object_data(s, object); + s->ctor(object); + kasan_poison_object_data(s, object); +@@ -2753,8 +2815,28 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, + + maybe_wipe_obj_freeptr(s, object); + +- if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) ++ if (has_sanitize_verify(s) && object) { ++ /* KASAN hasn't unpoisoned the object yet (this is done in the ++ * post-alloc hook), so let's do it temporarily. ++ */ ++ kasan_unpoison_object_data(s, object); ++ BUG_ON(memchr_inv(object, 0, s->object_size)); ++ if (s->ctor) ++ s->ctor(object); ++ kasan_poison_object_data(s, object); ++ } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) { + memset(object, 0, s->object_size); ++ if (s->ctor) { ++ kasan_unpoison_object_data(s, object); ++ s->ctor(object); ++ kasan_poison_object_data(s, object); ++ } ++ } ++ ++ if (object) { ++ check_canary(s, object, s->random_inactive); ++ set_canary(s, object, s->random_active); ++ } + + slab_post_alloc_hook(s, gfpflags, 1, &object); + +@@ -3137,7 +3219,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) + { + struct kmem_cache_cpu *c; +- int i; ++ int i, k; + + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, flags); +@@ -3177,11 +3259,35 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + local_irq_enable(); + + /* Clear memory outside IRQ disabled fastpath loop */ +- if (unlikely(slab_want_init_on_alloc(flags, s))) { ++ if (has_sanitize_verify(s)) { + int j; + +- for (j = 0; j < i; j++) ++ for (j = 0; j < i; j++) { ++ /* KASAN hasn't unpoisoned the object yet (this is done ++ * in the post-alloc hook), so let's do it temporarily. ++ */ ++ kasan_unpoison_object_data(s, p[j]); ++ BUG_ON(memchr_inv(p[j], 0, s->object_size)); ++ if (s->ctor) ++ s->ctor(p[j]); ++ kasan_poison_object_data(s, p[j]); ++ } ++ } else if (unlikely(slab_want_init_on_alloc(flags, s))) { ++ int j; ++ ++ for (j = 0; j < i; j++) { + memset(p[j], 0, s->object_size); ++ if (s->ctor) { ++ kasan_unpoison_object_data(s, p[j]); ++ s->ctor(p[j]); ++ kasan_poison_object_data(s, p[j]); ++ } ++ } ++ } ++ ++ for (k = 0; k < i; k++) { ++ check_canary(s, p[k], s->random_inactive); ++ set_canary(s, p[k], s->random_active); + } + + /* memcg and kmem_cache debug support */ +@@ -3215,9 +3321,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); + * and increases the number of allocations possible without having to + * take the list_lock. + */ +-static unsigned int slub_min_order; +-static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +-static unsigned int slub_min_objects; ++static unsigned int slub_min_order __ro_after_init; ++static unsigned int slub_max_order __ro_after_init = PAGE_ALLOC_COSTLY_ORDER; ++static unsigned int slub_min_objects __ro_after_init; + + /* + * Calculate the order of allocation given an slab object size. +@@ -3385,6 +3491,7 @@ static void early_kmem_cache_node_alloc(int node) + init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); + init_tracking(kmem_cache_node, n); + #endif ++ set_canary(kmem_cache_node, n, kmem_cache_node->random_active); + n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node), + GFP_KERNEL); + page->freelist = get_freepointer(kmem_cache_node, n); +@@ -3545,6 +3652,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) + size += sizeof(void *); + } + ++ if (IS_ENABLED(CONFIG_SLAB_CANARY)) ++ size += sizeof(void *); ++ + #ifdef CONFIG_SLUB_DEBUG + if (flags & SLAB_STORE_USER) + /* +@@ -3617,6 +3727,10 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) + #ifdef CONFIG_SLAB_FREELIST_HARDENED + s->random = get_random_long(); + #endif ++#ifdef CONFIG_SLAB_CANARY ++ s->random_active = get_random_long(); ++ s->random_inactive = get_random_long(); ++#endif + + if (!calculate_sizes(s, -1)) + goto error; +@@ -3892,6 +4006,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + offset -= s->red_left_pad; + } + ++ check_canary(s, (void *)ptr - offset, s->random_active); ++ + /* Allow address range falling entirely within usercopy region. */ + if (offset >= s->useroffset && + offset - s->useroffset <= s->usersize && +@@ -3925,7 +4041,11 @@ size_t __ksize(const void *object) + page = virt_to_head_page(object); + + if (unlikely(!PageSlab(page))) { ++#ifdef CONFIG_BUG_ON_DATA_CORRUPTION ++ BUG_ON(!PageCompound(page)); ++#else + WARN_ON(!PageCompound(page)); ++#endif + return page_size(page); + } + +@@ -4765,7 +4885,7 @@ enum slab_stat_type { + #define SO_TOTAL (1 << SL_TOTAL) + + #ifdef CONFIG_MEMCG +-static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); ++static bool memcg_sysfs_enabled __ro_after_init = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); + + static int __init setup_slub_memcg_sysfs(char *str) + { +diff --git a/mm/swap.c b/mm/swap.c +index 5341ae93861f..2f68a8be1397 100644 +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -94,6 +94,13 @@ static void __put_compound_page(struct page *page) + if (!PageHuge(page)) + __page_cache_release(page); + dtor = get_compound_page_dtor(page); ++ if (!PageHuge(page)) ++ BUG_ON(dtor != free_compound_page ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ && dtor != free_transhuge_page ++#endif ++ ); ++ + (*dtor)(page); + } + +diff --git a/mm/util.c b/mm/util.c +index 988d11e6c17c..94536089e0e9 100644 +--- a/mm/util.c ++++ b/mm/util.c +@@ -335,9 +335,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) + { + /* Is the current task 32bit ? */ + if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) +- return randomize_page(mm->brk, SZ_32M); ++ return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; + +- return randomize_page(mm->brk, SZ_1G); ++ return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; + } + + unsigned long arch_mmap_rnd(void) +diff --git a/net/core/dev.c b/net/core/dev.c +index c3da35f3c7e4..a7c47da860d8 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4700,7 +4700,7 @@ int netif_rx_ni(struct sk_buff *skb) + } + EXPORT_SYMBOL(netif_rx_ni); + +-static __latent_entropy void net_tx_action(struct softirq_action *h) ++static __latent_entropy void net_tx_action(void) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + +@@ -6563,7 +6563,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) + return work; + } + +-static __latent_entropy void net_rx_action(struct softirq_action *h) ++static __latent_entropy void net_rx_action(void) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + unsigned long time_limit = jiffies + +diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c +index 9f9e00ba3ad7..962c6ca661e4 100644 +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -43,6 +43,10 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); + int sysctl_devconf_inherit_init_net __read_mostly; + EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++int sysctl_stealth_blackhole __read_mostly = 1; ++#endif ++ + #ifdef CONFIG_RPS + static int rps_sock_flow_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +@@ -512,6 +516,17 @@ static struct ctl_table net_core_table[] = { + .proc_handler = set_default_qdisc + }, + #endif ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ { ++ .procname = "ip_blackhole", ++ .data = &sysctl_stealth_blackhole, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#endif + #endif /* CONFIG_NET */ + { + .procname = "netdev_budget", +diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig +index fc816b187170..e4a8e6ab83b3 100644 +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -267,6 +267,7 @@ config IP_PIMSM_V2 + + config SYN_COOKIES + bool "IP: TCP syncookie support" ++ default y + ---help--- + Normal TCP/IP networking is open to an attack known as "SYN + flooding". This denial-of-service attack prevents legitimate remote +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index 18068ed42f25..10162b46e62f 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -190,6 +190,10 @@ struct icmp_control { + short error; /* This ICMP is classed as an error message */ + }; + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; + + /* +@@ -930,6 +934,11 @@ static bool icmp_echo(struct sk_buff *skb) + { + struct net *net; + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (likely(sysctl_stealth_blackhole) && !(skb->dev->flags & IFF_LOOPBACK)) ++ return true; ++#endif ++ + net = dev_net(skb_dst(skb)->dev); + if (!net->ipv4.sysctl_icmp_echo_ignore_all) { + struct icmp_bxm icmp_param; +@@ -956,6 +965,12 @@ static bool icmp_echo(struct sk_buff *skb) + static bool icmp_timestamp(struct sk_buff *skb) + { + struct icmp_bxm icmp_param; ++ ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (likely(sysctl_stealth_blackhole) && !(skb->dev->flags & IFF_LOOPBACK)) ++ return true; ++#endif ++ + /* + * Too short. + */ +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index 3b9c7a2725a9..9196c1afe1c6 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -132,6 +132,10 @@ + ((in_dev)->mr_v2_seen && \ + time_before(jiffies, (in_dev)->mr_v2_seen))) + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static int unsolicited_report_interval(struct in_device *in_dev) + { + int interval_ms, interval_jiffies; +@@ -735,6 +739,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, + __be32 dst; + int hlen, tlen; + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (likely(sysctl_stealth_blackhole)) ++ return -1; ++#endif ++ + if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) + return igmpv3_send_report(in_dev, pmc); + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 2a976f57f7e7..002da7d3c79c 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -313,11 +313,13 @@ static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) + tp->ecn_flags &= ~TCP_ECN_OK; + } + ++#ifndef CONFIG_HARDENED_NO_SIMULT_CONNECT + static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) + { + if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; + } ++#endif + + static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) + { +@@ -6030,6 +6032,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, + tcp_paws_reject(&tp->rx_opt, 0)) + goto discard_and_undo; + ++#ifndef CONFIG_HARDENED_NO_SIMULT_CONNECT + if (th->syn) { + /* We see SYN without ACK. It is attempt of + * simultaneous connect with crossed SYNs. +@@ -6081,6 +6084,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, + goto discard; + #endif + } ++#endif + /* "fifth, if neither of the SYN or RST bits is set then + * drop the segment and return." + */ +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index 1c7326e04f9b..678babba399b 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -90,6 +90,10 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, + struct inet_hashinfo tcp_hashinfo; + EXPORT_SYMBOL(tcp_hashinfo); + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static u32 tcp_v4_init_seq(const struct sk_buff *skb) + { + return secure_tcp_seq(ip_hdr(skb)->daddr, +@@ -1588,6 +1592,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) + return 0; + + reset: ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!likely(sysctl_stealth_blackhole)) ++#endif + tcp_v4_send_reset(rsk, skb); + discard: + kfree_skb(skb); +@@ -1830,6 +1837,27 @@ int tcp_v4_rcv(struct sk_buff *skb) + if (!pskb_may_pull(skb, th->doff * 4)) + goto discard_it; + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (likely(sysctl_stealth_blackhole) && ++ ( ++ th->res1 || !tcp_flag_word(th) || ++ tcp_flag_word(th) == TCP_FLAG_PSH || ++ tcp_flag_word(th) & (TCP_FLAG_CWR | TCP_FLAG_ECE) || ++ ( ++ tcp_flag_word(th) & ++ (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST) && ++ tcp_flag_word(th) & TCP_FLAG_URG ++ ) || ++ ( ++ tcp_flag_word(th) & ++ (TCP_FLAG_FIN | TCP_FLAG_RST) && ++ tcp_flag_word(th) & TCP_FLAG_SYN ++ ) ++ ) ++ ) ++ goto discard_it; ++#endif ++ + /* An explanation is required here, I think. + * Packet length and doff are validated by header prediction, + * provided case of th->doff==0 is eliminated. +@@ -1843,12 +1871,22 @@ int tcp_v4_rcv(struct sk_buff *skb) + lookup: + sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, + th->dest, sdif, &refcounted); +- if (!sk) ++ if (!sk) { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ ret = 1; ++#endif ++ + goto no_tcp_socket; ++ } + + process: +- if (sk->sk_state == TCP_TIME_WAIT) ++ if (sk->sk_state == TCP_TIME_WAIT) { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ ret = 2; ++#endif ++ + goto do_time_wait; ++ } + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +@@ -1968,6 +2006,11 @@ int tcp_v4_rcv(struct sk_buff *skb) + bad_packet: + __TCP_INC_STATS(net, TCP_MIB_INERRS); + } else { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!sysctl_stealth_blackhole || (ret == 1 && ++ (skb->dev->flags & IFF_LOOPBACK))) ++#endif ++ + tcp_v4_send_reset(NULL, skb); + } + +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index c802bc80c400..9efacbc3b3e6 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -30,6 +30,10 @@ + #include <net/xfrm.h> + #include <net/busy_poll.h> + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) + { + if (seq == s_win) +@@ -790,6 +794,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + * avoid becoming vulnerable to outside attack aiming at + * resetting legit local connections. + */ ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!sysctl_stealth_blackhole || skb->dev->flags & IFF_LOOPBACK) ++#endif ++ + req->rsk_ops->send_reset(sk, skb); + } else if (fastopen) { /* received a valid RST pkt */ + reqsk_fastopen_remove(sk, req, true); +diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c +index be5c5903cfe1..3eb12b8337c0 100644 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -125,6 +125,10 @@ EXPORT_SYMBOL(udp_memory_allocated); + #define MAX_UDP_PORTS 65536 + #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static int udp_lib_lport_inuse(struct net *net, __u16 num, + const struct udp_hslot *hslot, + unsigned long *bitmap, +@@ -2341,6 +2345,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, + goto csum_error; + + __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!likely(sysctl_stealth_blackhole) || (skb->dev->flags & IFF_LOOPBACK)) ++#endif + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + /* +diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c +index ef408a5090a2..2280ac9ab256 100644 +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -68,6 +68,10 @@ + + #include <linux/uaccess.h> + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + /* + * The ICMP socket(s). This is the most convenient way to flow control + * our ICMP output as well as maintain a clean interface throughout +@@ -879,6 +883,9 @@ static int icmpv6_rcv(struct sk_buff *skb) + + switch (type) { + case ICMPV6_ECHO_REQUEST: ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!sysctl_stealth_blackhole || skb->dev->flags & IFF_LOOPBACK) ++#endif + if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) + icmpv6_echo_reply(skb); + break; +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index df5fd9109696..153c20e904e0 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -68,6 +68,10 @@ + + #include <trace/events/tcp.h> + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); + static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +@@ -1407,6 +1411,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + return 0; + + reset: ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!likely(sysctl_stealth_blackhole)) ++#endif ++ + tcp_v6_send_reset(sk, skb); + discard: + if (opt_skb) +@@ -1505,6 +1513,27 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) + if (!pskb_may_pull(skb, th->doff*4)) + goto discard_it; + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (likely(sysctl_stealth_blackhole) && ++ ( ++ th->res1 || !tcp_flag_word(th) || ++ tcp_flag_word(th) == TCP_FLAG_PSH || ++ tcp_flag_word(th) & (TCP_FLAG_CWR | TCP_FLAG_ECE) || ++ ( ++ tcp_flag_word(th) & ++ (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST) && ++ tcp_flag_word(th) & TCP_FLAG_URG ++ ) || ++ ( ++ tcp_flag_word(th) & ++ (TCP_FLAG_FIN | TCP_FLAG_RST) && ++ tcp_flag_word(th) & TCP_FLAG_SYN ++ ) ++ ) ++ ) ++ goto discard_it; ++#endif ++ + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) + goto csum_error; + +@@ -1515,12 +1544,22 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), + th->source, th->dest, inet6_iif(skb), sdif, + &refcounted); +- if (!sk) ++ if (!sk) { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ ret = 1; ++#endif ++ + goto no_tcp_socket; ++ } + + process: +- if (sk->sk_state == TCP_TIME_WAIT) ++ if (sk->sk_state == TCP_TIME_WAIT) { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ ret = 2; ++#endif ++ + goto do_time_wait; ++ } + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +@@ -1633,6 +1672,11 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) + bad_packet: + __TCP_INC_STATS(net, TCP_MIB_INERRS); + } else { ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!sysctl_stealth_blackhole || (ret == 1 && ++ (skb->dev->flags & IFF_LOOPBACK))) ++#endif ++ + tcp_v6_send_reset(NULL, skb); + } + +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index 9fec580c968e..aaba8b13ba66 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -54,6 +54,10 @@ + #include <trace/events/skb.h> + #include "udp_impl.h" + ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++extern int sysctl_stealth_blackhole; ++#endif ++ + static u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, +@@ -923,6 +927,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, + goto csum_error; + + __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); ++#ifdef CONFIG_HARDENED_STEALTH_NETWORKING ++ if (!likely(sysctl_stealth_blackhole) || skb->dev->flags & IFF_LOOPBACK) ++#endif + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 69897d5d3a70..ed6fdceb1616 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -53,6 +53,7 @@ MODPOST = scripts/mod/modpost \ + $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ + $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ ++ $(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f) \ + $(if $(KBUILD_MODPOST_WARN),-w) + + ifdef MODPOST_VMLINUX +diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig +index e3569543bdac..55cc439b3bc6 100644 +--- a/scripts/gcc-plugins/Kconfig ++++ b/scripts/gcc-plugins/Kconfig +@@ -61,6 +61,11 @@ config GCC_PLUGIN_LATENT_ENTROPY + is some slowdown of the boot process (about 0.5%) and fork and + irq processing. + ++ When extra_latent_entropy is passed on the kernel command line, ++ entropy will be extracted from up to the first 4GB of RAM while the ++ runtime memory allocator is being initialized. This costs even more ++ slowdown of the boot process. ++ + Note that entropy extracted this way is not cryptographically + secure! + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 6e892c93d104..f41d496f443e 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -36,6 +36,8 @@ static int warn_unresolved = 0; + /* How a symbol is exported */ + static int sec_mismatch_count = 0; + static int sec_mismatch_fatal = 0; ++static int writable_fptr_count = 0; ++static int writable_fptr_verbose = 0; + /* ignore missing files */ + static int ignore_missing_files; + +@@ -1018,6 +1020,7 @@ enum mismatch { + ANY_EXIT_TO_ANY_INIT, + EXPORT_TO_INIT_EXIT, + EXTABLE_TO_NON_TEXT, ++ DATA_TO_TEXT + }; + + /** +@@ -1144,6 +1147,12 @@ static const struct sectioncheck sectioncheck[] = { + .good_tosec = {ALL_TEXT_SECTIONS , NULL}, + .mismatch = EXTABLE_TO_NON_TEXT, + .handler = extable_mismatch_handler, ++}, ++/* Do not reference code from writable data */ ++{ ++ .fromsec = { DATA_SECTIONS, NULL }, ++ .bad_tosec = { ALL_TEXT_SECTIONS, NULL }, ++ .mismatch = DATA_TO_TEXT + } + }; + +@@ -1331,10 +1340,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, + continue; + if (!is_valid_name(elf, sym)) + continue; +- if (sym->st_value == addr) +- return sym; + /* Find a symbol nearby - addr are maybe negative */ + d = sym->st_value - addr; ++ if (d == 0) ++ return sym; + if (d < 0) + d = addr - sym->st_value; + if (d < distance) { +@@ -1469,7 +1478,13 @@ static void report_sec_mismatch(const char *modname, + char *prl_from; + char *prl_to; + +- sec_mismatch_count++; ++ if (mismatch->mismatch == DATA_TO_TEXT) { ++ writable_fptr_count++; ++ if (!writable_fptr_verbose) ++ return; ++ } else { ++ sec_mismatch_count++; ++ } + + get_pretty_name(from_is_func, &from, &from_p); + get_pretty_name(to_is_func, &to, &to_p); +@@ -1591,6 +1606,12 @@ static void report_sec_mismatch(const char *modname, + fatal("There's a special handler for this mismatch type, " + "we should never get here."); + break; ++ case DATA_TO_TEXT: ++ fprintf(stderr, ++ "The %s %s:%s references\n" ++ "the %s %s:%s%s\n", ++ from, fromsec, fromsym, to, tosec, tosym, to_p); ++ break; + } + fprintf(stderr, "\n"); + } +@@ -2566,7 +2587,7 @@ int main(int argc, char **argv) + struct ext_sym_list *extsym_iter; + struct ext_sym_list *extsym_start = NULL; + +- while ((opt = getopt(argc, argv, "i:e:mnsT:o:awEd:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:e:fmnsT:o:awEd")) != -1) { + switch (opt) { + case 'i': + kernel_read = optarg; +@@ -2580,6 +2601,9 @@ int main(int argc, char **argv) + extsym_iter->file = optarg; + extsym_start = extsym_iter; + break; ++ case 'f': ++ writable_fptr_verbose = 1; ++ break; + case 'm': + modversions = 1; + break; +@@ -2680,6 +2704,11 @@ int main(int argc, char **argv) + } + + free(buf.p); ++ if (writable_fptr_count && !writable_fptr_verbose) ++ warn("modpost: Found %d writable function pointer%s.\n" ++ "To see full details build your kernel with:\n" ++ "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n", ++ writable_fptr_count, (writable_fptr_count == 1 ? "" : "s")); + + return err; + } +diff --git a/security/Kconfig b/security/Kconfig +index 2a1a2d396228..66eb3db67eb0 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -9,7 +9,7 @@ source "security/keys/Kconfig" + + config SECURITY_DMESG_RESTRICT + bool "Restrict unprivileged access to the kernel syslog" +- default n ++ default y + help + This enforces restrictions on unprivileged users reading the kernel + syslog via dmesg(8). +@@ -19,10 +19,34 @@ config SECURITY_DMESG_RESTRICT + + If you are unsure how to answer this question, answer N. + ++config SECURITY_PERF_EVENTS_RESTRICT ++ bool "Restrict unprivileged use of performance events" ++ depends on PERF_EVENTS ++ default y ++ help ++ If you say Y here, the kernel.perf_event_paranoid sysctl ++ will be set to 3 by default, and no unprivileged use of the ++ perf_event_open syscall will be permitted unless it is ++ changed. ++ ++config SECURITY_TIOCSTI_RESTRICT ++ bool "Restrict unprivileged use of tiocsti command injection" ++ default y ++ help ++ This enforces restrictions on unprivileged users injecting commands ++ into other processes which share a tty session using the TIOCSTI ++ ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN. ++ ++ If this option is not selected, no restrictions will be enforced ++ unless the tiocsti_restrict sysctl is explicitly set to (1). ++ ++ If you are unsure how to answer this question, answer N. ++ + config SECURITY + bool "Enable different security models" + depends on SYSFS + depends on MULTIUSER ++ default y + help + This allows you to choose different security modules to be + configured into your kernel. +@@ -48,6 +72,7 @@ config SECURITYFS + config SECURITY_NETWORK + bool "Socket and Networking Security Hooks" + depends on SECURITY ++ default y + help + This enables the socket and networking security hooks. + If enabled, a security module can use these hooks to +@@ -154,6 +179,7 @@ config HARDENED_USERCOPY + bool "Harden memory copies between kernel and userspace" + depends on HAVE_HARDENED_USERCOPY_ALLOCATOR + imply STRICT_DEVMEM ++ default y + help + This option checks for obviously wrong memory regions when + copying memory to/from the kernel (via copy_to_user() and +@@ -166,7 +192,6 @@ config HARDENED_USERCOPY + config HARDENED_USERCOPY_FALLBACK + bool "Allow usercopy whitelist violations to fallback to object size" + depends on HARDENED_USERCOPY +- default y + help + This is a temporary option that allows missing usercopy whitelists + to be discovered via a WARN() to the kernel log, instead of +@@ -191,10 +216,21 @@ config HARDENED_USERCOPY_PAGESPAN + config FORTIFY_SOURCE + bool "Harden common str/mem functions against buffer overflows" + depends on ARCH_HAS_FORTIFY_SOURCE ++ default y + help + Detect overflows of buffers in common string and memory functions + where the compiler can determine and validate the buffer sizes. + ++config FORTIFY_SOURCE_STRICT_STRING ++ bool "Harden common functions against buffer overflows" ++ depends on FORTIFY_SOURCE ++ depends on EXPERT ++ help ++ Perform stricter overflow checks catching overflows within objects ++ for common C string functions rather than only between objects. ++ ++ This is not yet intended for production use, only bug finding. ++ + config STATIC_USERMODEHELPER + bool "Force all usermode helper calls through a single binary" + help +@@ -293,3 +329,128 @@ source "security/Kconfig.hardening" + + endmenu + ++menu "Hardened Enhancements" ++ ++config HARDENED_RANDOM ++ bool "Enhance the random number generator" ++ default n ++ help ++ Enabling this option enhances the Linux kernel random number generator. ++ This is done by: ++ - Increasing the pool size from 4096 bits to 262144 bits. ( 512B -> 32KB ) ++ - Increasing the diffusion via the linear feedback shift register. ++ - Defines newer 64-bit polynomial fields for the input and output pools. ++ ++ Overall, this enhances the total entropy available to the system and further ++ enhances the random number generator. ++ ++ ++config HARDENED_STEALTH_NETWORKING ++ bool "Enable stealth networking [GRSECURITY]" ++ default n ++ depends on NET ++ help ++ If you say Y here, neither TCP resets nor ICMP ++ destination-unreachable packets will be sent in response to packets ++ sent to ports for which no associated listening process exists. ++ This feature supports both IPV4 and IPV6 and exempts the ++ loopback interface from blackholing. Enabling this feature ++ makes a host more resilient to DoS attacks and reduces network ++ visibility against scanners. ++ ++ The blackhole feature as-implemented is equivalent to the FreeBSD ++ blackhole feature, as it prevents RST responses to all packets, not ++ just SYNs. Under most application behavior this causes no ++ problems, but applications (like haproxy) may not close certain ++ connections in a way that cleanly terminates them on the remote ++ end, leaving the remote host in LAST_ACK state. Because of this ++ side-effect and to prevent intentional LAST_ACK DoSes, this ++ feature also adds automatic mitigation against such attacks. ++ The mitigation drastically reduces the amount of time a socket ++ can spend in LAST_ACK state. If you're using haproxy and not ++ all servers it connects to have this option enabled, consider ++ disabling this feature on the haproxy host. ++ ++ If the sysctl option is enabled, a sysctl option with names ++ "ip_blackhole" will be created. ++ This sysctl, "ip_blackhole" takes the standard zero/non-zero ++ on/off toggle to enable or disable this feature. ++ ++ ++config HARDENED_NO_SIMULT_CONNECT ++ bool "Disable simultaneous TCP connections [GRSECURITY]" ++ default n ++ depends on NET ++ help ++ If you say Y here, a feature by Willy Tarreau will be enabled that ++ removes a weakness in Linux's strict implementation of TCP that ++ allows two clients to connect to each other without either entering ++ a listening state. The weakness allows an attacker to easily prevent ++ a client from connecting to a known server provided the source port ++ for the connection is guessed correctly. ++ ++ As the weakness could be used to prevent an antivirus or IPS from ++ fetching updates, or prevent an SSL gateway from fetching a CRL, ++ it should be eliminated by enabling this option. Though Linux is ++ one of few operating systems supporting simultaneous connect, it ++ has no legitimate use in practice and is rarely supported by firewalls. ++ ++ ++config HARDENED_SYSFS_RESTRICT ++ bool "Restrict SysFS & DebugFS [GRSECURITY]" ++ default y ++ depends on SYSFS ++ help ++ If you say Y here, sysfs (the pseudo-filesystem mounted at /sys) and ++ any filesystem normally mounted under it (e.g. debugfs) will be ++ mostly accessible only by root. These filesystems generally provide access ++ to hardware and debug information that isn't appropriate for unprivileged ++ users of the system. Sysfs and debugfs have also become a large source ++ of new vulnerabilities, ranging from infoleaks to local compromise. ++ There has been very little oversight with an eye toward security involved ++ in adding new exporters of information to these filesystems, so their ++ use is discouraged. ++ To enable or disable this feature at runtime, use the sysctl ++ kernel.sysfs_restricted. ++ For reasons of compatibility, a few directories have been whitelisted ++ for access by non-root users: ++ /sys/fs/selinux ++ /sys/fs/fuse ++ /sys/devices/system/cpu ++ ++ ++config HARDENED_FIFO ++ bool "Restrict FIFO [GRSECURITY]" ++ default y ++ help ++ If you say Y here, users will not be able to write to FIFOs they don't ++ own in world-writable +t directories (e.g. /tmp), unless the owner of ++ the FIFO is the same owner of the directory it's held in. If the sysctl ++ option is enabled, a sysctl option with name "fifo_restrictions" is ++ created. ++ ++ ++config HARDENED_MODULE_LOAD ++ bool "Harden module auto-loading [GRSECURITY]" ++ default y ++ depends on MODULES ++ help ++ If you say Y here, module auto-loading in response to use of some ++ feature implemented by an unloaded module will be restricted to ++ root users. Enabling this option helps defend against attacks ++ by unprivileged users who abuse the auto-loading behavior to ++ cause a vulnerable module to load that is then exploited. ++ ++ If this option prevents a legitimate use of auto-loading for a ++ non-root user, the administrator can execute modprobe manually ++ with the exact name of the module mentioned in the alert log. ++ Alternatively, the administrator can add the module to the list ++ of modules loaded at boot by modifying init scripts. ++ ++ Modification of init scripts will most likely be needed on ++ Ubuntu servers with encrypted home directory support enabled, ++ as the first non-root user logging in will cause the ecb(aes), ++ ecb(aes)-all, cbc(aes), and cbc(aes)-all modules to be loaded. ++ ++ ++endmenu +diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening +index af4c979b38ee..473e40bb8537 100644 +--- a/security/Kconfig.hardening ++++ b/security/Kconfig.hardening +@@ -169,6 +169,7 @@ config STACKLEAK_RUNTIME_DISABLE + + config INIT_ON_ALLOC_DEFAULT_ON + bool "Enable heap memory zeroing on allocation by default" ++ default yes + help + This has the effect of setting "init_on_alloc=1" on the kernel + command line. This can be disabled with "init_on_alloc=0". +@@ -181,6 +182,7 @@ config INIT_ON_ALLOC_DEFAULT_ON + + config INIT_ON_FREE_DEFAULT_ON + bool "Enable heap memory zeroing on free by default" ++ default yes + help + This has the effect of setting "init_on_free=1" on the kernel + command line. This can be disabled with "init_on_free=0". +@@ -196,6 +198,20 @@ config INIT_ON_FREE_DEFAULT_ON + touching "cold" memory areas. Most cases see 3-5% impact. Some + synthetic workloads have measured as high as 8%. + ++config PAGE_SANITIZE_VERIFY ++ bool "Verify sanitized pages" ++ default y ++ help ++ When init_on_free is enabled, verify that newly allocated pages ++ are zeroed to detect write-after-free bugs. ++ ++config SLAB_SANITIZE_VERIFY ++ default y ++ bool "Verify sanitized SLAB allocations" ++ help ++ When init_on_free is enabled, verify that newly allocated slab ++ objects are zeroed to detect write-after-free bugs. ++ + endmenu + + endmenu +diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig +index 5711689deb6a..fab0cb896907 100644 +--- a/security/selinux/Kconfig ++++ b/security/selinux/Kconfig +@@ -3,7 +3,7 @@ config SECURITY_SELINUX + bool "NSA SELinux Support" + depends on SECURITY_NETWORK && AUDIT && NET && INET + select NETWORK_SECMARK +- default n ++ default y + help + This selects NSA Security-Enhanced Linux (SELinux). + You will also need a policy configuration and a labeled filesystem. +@@ -65,23 +65,3 @@ config SECURITY_SELINUX_AVC_STATS + This option collects access vector cache statistics to + /selinux/avc/cache_stats, which may be monitored via + tools such as avcstat. +- +-config SECURITY_SELINUX_CHECKREQPROT_VALUE +- int "NSA SELinux checkreqprot default value" +- depends on SECURITY_SELINUX +- range 0 1 +- default 0 +- help +- This option sets the default value for the 'checkreqprot' flag +- that determines whether SELinux checks the protection requested +- by the application or the protection that will be applied by the +- kernel (including any implied execute for read-implies-exec) for +- mmap and mprotect calls. If this option is set to 0 (zero), +- SELinux will default to checking the protection that will be applied +- by the kernel. If this option is set to 1 (one), SELinux will +- default to checking the protection requested by the application. +- The checkreqprot flag may be changed from the default via the +- 'checkreqprot=' boot parameter. It may also be changed at runtime +- via /selinux/checkreqprot if authorized by policy. +- +- If you are unsure how to answer this question, answer 0. +diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c +index db44c7eb4321..045a6940f105 100644 +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -135,18 +135,7 @@ static int __init selinux_enabled_setup(char *str) + __setup("selinux=", selinux_enabled_setup); + #endif + +-static unsigned int selinux_checkreqprot_boot = +- CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; +- +-static int __init checkreqprot_setup(char *str) +-{ +- unsigned long checkreqprot; +- +- if (!kstrtoul(str, 0, &checkreqprot)) +- selinux_checkreqprot_boot = checkreqprot ? 1 : 0; +- return 1; +-} +-__setup("checkreqprot=", checkreqprot_setup); ++static const unsigned int selinux_checkreqprot_boot; + + /** + * selinux_secmark_enabled - Check to see if SECMARK is currently enabled +diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c +index ee94fa469c29..c2df7ca14883 100644 +--- a/security/selinux/selinuxfs.c ++++ b/security/selinux/selinuxfs.c +@@ -635,7 +635,6 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, + static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) + { +- struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + char *page; + ssize_t length; + unsigned int new_value; +@@ -659,10 +658,9 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, + return PTR_ERR(page); + + length = -EINVAL; +- if (sscanf(page, "%u", &new_value) != 1) ++ if (sscanf(page, "%u", &new_value) != 1 || new_value) + goto out; + +- fsi->state->checkreqprot = new_value ? 1 : 0; + length = count; + out: + kfree(page); +diff --git a/security/yama/Kconfig b/security/yama/Kconfig +index a810304123ca..b809050b25d2 100644 +--- a/security/yama/Kconfig ++++ b/security/yama/Kconfig +@@ -2,7 +2,7 @@ + config SECURITY_YAMA + bool "Yama support" + depends on SECURITY +- default n ++ default y + help + This selects Yama, which extends DAC support with additional + system-wide security settings beyond regular Linux discretionary +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 75b7ee1af1c3..2bf68e7b2ee9 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -632,6 +632,10 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) + struct kvm_stat_data *stat_data; + struct kvm_stats_debugfs_item *p; + ++#ifdef CONFIG_HARDENED_SYSFS_RESTRICT ++ return 0; ++#endif ++ + if (!debugfs_initialized()) + return 0; + |