lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87r1x8v0eb.fsf@nanos.tec.linutronix.de>
Date:   Tue, 31 Mar 2020 20:34:36 +0200
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Balbir Singh <sblbir@...zon.com>, linux-kernel@...r.kernel.org
Cc:     tony.luck@...el.com, keescook@...omium.org, x86@...nel.org,
        benh@...nel.crashing.org, dave.hansen@...el.com,
        Balbir Singh <sblbir@...zon.com>
Subject: Re: [RFC PATCH v2 3/4] arch/x86: Optionally flush L1D on context switch

Balbir Singh <sblbir@...zon.com> writes:

> This patch implements a mechanisn to selectively flush the L1D cache.

git grep 'This patch' Documentation/process/

> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 6f66d841262d..1d535059b358 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -219,6 +219,12 @@ struct tlb_state {
>  	 */
>  	unsigned long cr4;
>  
> +	/*
> +	 * Flush the L1D cache on switch_mm_irqs_off() for a
> +	 * task getting off the CPU, if it opted in to do so
> +	 */
> +	bool last_user_mm_l1d_flush;

...

> +/*
> + * Flush the L1D cache for this CPU. We want to this at switch mm time,
> + * this is a pessimistic security measure and an opt-in for those tasks
> + * that host sensitive information and there are concerns about spills
> + * from fill buffers.

Color me confused, but how is L1D flush mitigating fill buffer spills
(MFBDS)? The MDS family is mitigated by MD_CLEAR, i.e VERW.

> + */
> +static void l1d_flush(struct mm_struct *next, struct task_struct *tsk)
> +{
> +	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
> +
> +	/*
> +	 * If we are not really switching mm's, we can just return
> +	 */
> +	if (real_prev == next)
> +		return;

Instead of having the same check here, please stick the call into the
corresponding path in switch_mm_irqs_off(), i.e. where we already have
the cond_ibpb() invocation.

> +	/*
> +	 * Do we need flushing for by the previous task

  for by? Perhaps:

  Did the previous task request L1D flush when it scheduled in?

> +	 */
> +	if (this_cpu_read(cpu_tlbstate.last_user_mm_l1d_flush) != 0) {

This is a bool, so != 0 is pointless.

> +		if (!flush_l1d_cache_hw())
> +			flush_l1d_cache_sw(l1d_flush_pages);
> +		this_cpu_write(cpu_tlbstate.last_user_mm_l1d_flush, 0);

s/0/false/

> +		/* Make sure we clear the values before we set it again */
> +		barrier();
> +	}
> +
> +	if (tsk == NULL)
> +		return;
> +
> +	/* We don't need stringent checks as we opt-in/opt-out */
> +	if (test_ti_thread_flag(&tsk->thread_info, TIF_L1D_FLUSH))
> +		this_cpu_write(cpu_tlbstate.last_user_mm_l1d_flush, 1);

s/1/true/

That aside looking at the gazillion of conditionals here. That's 4 in
the worst case. So how about extending cond_ibpb() like the below?

Thanks,

        tglx

8<---------------------

--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,7 +84,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE	10	/* Force speculation MSR update in context switch */
+#define TIF_SPEC_FLUSH_L1D	10	/* L1D Flush in switch_mm() */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 #define TIF_PATCH_PENDING	13	/* pending live patching update */
@@ -96,6 +96,7 @@ struct thread_info {
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE	23	/* Force speculation MSR update in context switch */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -172,7 +172,7 @@ struct tlb_state {
 	/* Last user mm for optimizing IBPB */
 	union {
 		struct mm_struct	*last_user_mm;
-		unsigned long		last_user_mm_ibpb;
+		unsigned long		last_user_mm_spec;
 	};
 
 	u16 loaded_mm_asid;
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -33,10 +33,13 @@
  */
 
 /*
- * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
- * stored in cpu_tlb_state.last_user_mm_ibpb.
+ * Bits mangle the TIF_SPEC_* state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_spec.
  */
 #define LAST_USER_MM_IBPB	0x1UL
+#define LAST_USER_MM_FLUSH_L1D	0x2UL
+
+#define LAST_USER_MM_SPEC_MASK	(LAST_USER_MM_IBPB | LAST_USER_MM_FLUSH_L1D)
 
 /*
  * We get here when we do something requiring a TLB invalidation
@@ -189,18 +192,22 @@ static void sync_current_stack_to_mm(str
 	}
 }
 
-static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+static inline unsigned long mm_mangle_tif_spec(struct task_struct *next)
 {
 	unsigned long next_tif = task_thread_info(next)->flags;
-	unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+	unsigned long bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
+
+	BUILD_BUG_ON(TIF_SPEC_FLUSH_L1D != TIF_SPEC_IB + 1);
 
-	return (unsigned long)next->mm | ibpb;
+	return (unsigned long)next->mm | bits;
 }
 
-static void cond_ibpb(struct task_struct *next)
+static void cond_mitigations(struct task_struct *next)
 {
-	if (!next || !next->mm)
-		return;
+	unsigned long prev_mm, next_mm;
+
+	prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
+	next_mm = mm_mangle_tif_spec(next);
 
 	/*
 	 * Both, the conditional and the always IBPB mode use the mm
@@ -212,8 +219,6 @@ static void cond_ibpb(struct task_struct
 	 * exposed data is not really interesting.
 	 */
 	if (static_branch_likely(&switch_mm_cond_ibpb)) {
-		unsigned long prev_mm, next_mm;
-
 		/*
 		 * This is a bit more complex than the always mode because
 		 * it has to handle two cases:
@@ -243,20 +248,14 @@ static void cond_ibpb(struct task_struct
 		 * Optimize this with reasonably small overhead for the
 		 * above cases. Mangle the TIF_SPEC_IB bit into the mm
 		 * pointer of the incoming task which is stored in
-		 * cpu_tlbstate.last_user_mm_ibpb for comparison.
-		 */
-		next_mm = mm_mangle_tif_spec_ib(next);
-		prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
-
-		/*
+		 * cpu_tlbstate.last_user_mm_spec for comparison.
+		 *
 		 * Issue IBPB only if the mm's are different and one or
 		 * both have the IBPB bit set.
 		 */
 		if (next_mm != prev_mm &&
 		    (next_mm | prev_mm) & LAST_USER_MM_IBPB)
 			indirect_branch_prediction_barrier();
-
-		this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
 	}
 
 	if (static_branch_unlikely(&switch_mm_always_ibpb)) {
@@ -265,11 +264,15 @@ static void cond_ibpb(struct task_struct
 		 * different context than the user space task which ran
 		 * last on this CPU.
 		 */
-		if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+		if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
+		    (unsigned long)next->mm)
 			indirect_branch_prediction_barrier();
-			this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
-		}
 	}
+
+	if (prev_mm & LAST_USER_MM_FLUSH_L1D)
+		flush_l1d();
+
+	this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
 }
 
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
@@ -371,11 +374,10 @@ void switch_mm_irqs_off(struct mm_struct
 		need_flush = true;
 	} else {
 		/*
-		 * Avoid user/user BTB poisoning by flushing the branch
-		 * predictor when switching between processes. This stops
-		 * one process from doing Spectre-v2 attacks on another.
+		 * Speculation vulnerability mitigations when switching
+		 * to a different user space process.
 		 */
-		cond_ibpb(tsk);
+		cond_mitigations(tsk);
 
 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
 			/*
@@ -501,7 +503,7 @@ void initialize_tlbstate_and_flush(void)
 	write_cr3(build_cr3(mm->pgd, 0));
 
 	/* Reinitialize tlbstate. */
-	this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
+	this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_SPEC_MASK);
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
 	this_cpu_write(cpu_tlbstate.next_asid, 1);
 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ