lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6380ba8d-4e99-46e6-8d92-911d10963ba7@suse.com>
Date: Mon, 26 Feb 2024 09:17:30 +0200
From: Nikolay Borisov <nik.borisov@...e.com>
To: linux-kernel@...r.kernel.org, linux-tip-commits@...r.kernel.org
Cc: Alyssa Milburn <alyssa.milburn@...el.com>,
 Andrew Cooper <andrew.cooper3@...rix.com>,
 Peter Zijlstra <peterz@...radead.org>,
 Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
 Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org
Subject: Re: [tip: x86/urgent] x86/bugs: Add asm helpers for executing VERW



On 20.02.24 г. 3:02 ч., tip-bot2 for Pawan Gupta wrote:
> The following commit has been merged into the x86/urgent branch of tip:
> 
> Commit-ID:     baf8361e54550a48a7087b603313ad013cc13386
> Gitweb:        https://git.kernel.org/tip/baf8361e54550a48a7087b603313ad013cc13386
> Author:        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>
> AuthorDate:    Tue, 13 Feb 2024 18:21:35 -08:00
> Committer:     Dave Hansen <dave.hansen@...ux.intel.com>
> CommitterDate: Mon, 19 Feb 2024 16:31:33 -08:00
> 
> x86/bugs: Add asm helpers for executing VERW
> 
> MDS mitigation requires clearing the CPU buffers before returning to
> user. This needs to be done late in the exit-to-user path. Current
> location of VERW leaves a possibility of kernel data ending up in CPU
> buffers for memory accesses done after VERW such as:
> 
>    1. Kernel data accessed by an NMI between VERW and return-to-user can
>       remain in CPU buffers since NMI returning to kernel does not
>       execute VERW to clear CPU buffers.
>    2. Alyssa reported that after VERW is executed,
>       CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
>       call. Memory accesses during stack scrubbing can move kernel stack
>       contents into CPU buffers.
>    3. When caller saved registers are restored after a return from
>       function executing VERW, the kernel stack accesses can remain in
>       CPU buffers(since they occur after VERW).
> 
> To fix this VERW needs to be moved very late in exit-to-user path.
> 
> In preparation for moving VERW to entry/exit asm code, create macros
> that can be used in asm. Also make VERW patching depend on a new feature
> flag X86_FEATURE_CLEAR_CPU_BUF.
> 
> Reported-by: Alyssa Milburn <alyssa.milburn@...el.com>
> Suggested-by: Andrew Cooper <andrew.cooper3@...rix.com>
> Suggested-by: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>
> Signed-off-by: Dave Hansen <dave.hansen@...ux.intel.com>
> Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com
> ---
>   arch/x86/entry/entry.S               | 23 +++++++++++++++++++++++
>   arch/x86/include/asm/cpufeatures.h   |  2 +-
>   arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
>   3 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
> index 8c8d38f..0033790 100644
> --- a/arch/x86/entry/entry.S
> +++ b/arch/x86/entry/entry.S
> @@ -6,6 +6,9 @@
>   #include <linux/export.h>
>   #include <linux/linkage.h>
>   #include <asm/msr-index.h>
> +#include <asm/unwind_hints.h>
> +#include <asm/segment.h>
> +#include <asm/cache.h>
>   
>   .pushsection .noinstr.text, "ax"
>   
> @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
>   EXPORT_SYMBOL_GPL(entry_ibpb);
>   
>   .popsection
> +
> +/*
> + * Define the VERW operand that is disguised as entry code so that
> + * it can be referenced with KPTI enabled. This ensure VERW can be
> + * used late in exit-to-user path after page tables are switched.
> + */
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_END(mds_verw_sel);
> +/* For KVM */
> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> +
> +.popsection
> +
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index fdf723b..2b62cdd 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -95,7 +95,7 @@
>   #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
>   #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
>   #define X86_FEATURE_AMD_LBR_V2		( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
> -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC		( 3*32+18) "" LFENCE synchronizes RDTSC */
> +#define X86_FEATURE_CLEAR_CPU_BUF	( 3*32+18) /* "" Clear CPU buffers using VERW */
>   #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
>   #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
>   #define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 262e655..077083e 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -315,6 +315,17 @@
>   #endif
>   .endm
>   
> +/*
> + * Macro to execute VERW instruction that mitigate transient data sampling
> + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> + *
> + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> + */
> +.macro CLEAR_CPU_BUFFERS
> +	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF

Any particular reason why this uses RIP-relative vs an absolute address 
mode? I know in our private exchange you said there is no significance 
but for example older kernels have a missing relocation support in 
alternatives. This of course can be worked around by slightly changing 
the logic of the macro which means different kernels will have slightly 
different macros. Relocation support landed in: 
270a69c4485d7d07516d058bcc0473c90ee22185 (6.5)

> +.endm
> +
>   #else /* __ASSEMBLY__ */
>   
>   #define ANNOTATE_RETPOLINE_SAFE					\
> @@ -536,6 +547,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
>   
>   DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
>   
> +extern u16 mds_verw_sel;
> +
>   #include <asm/segment.h>
>   
>   /**
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ