linux-kernel - Re: [PATCH 00/11] x86: cpumask: some more cpumask cleanups

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4962D4CF.7070200@sgi.com>
Date:	Mon, 05 Jan 2009 19:49:35 -0800
From:	Mike Travis <travis@....com>
To:	Ingo Molnar <mingo@...e.hu>, Rusty Russell <rusty@...tcorp.com.au>
CC:	"H. Peter Anvin" <hpa@...or.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Jack Steiner <steiner@....com>, Cliff Wickman <cpw@....com>,
	Nick Piggin <nickpiggin@...oo.com.au>,
	Jeremy Fitzhardinge <jeremy@...p.org>,
	Christoph Lameter <cl@...ux-foundation.org>,
	Jes Sorensen <jes@....com>, LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 00/11] x86: cpumask: some more cpumask cleanups - flush_tlb_*

Ingo Molnar wrote:
...
>> ====== Stack (-l 500)
>>     1 - allyesconfig-128
>>     2 - allyesconfig-4k
>>
>>   .1.    .2.    ..final..
>>     0  +1032   1032      .  flush_tlb_page
>>     0  +1024   1024      .  kvm_reload_remote_mmus
>>     0  +1024   1024      .  kvm_flush_remote_tlbs
>>     0  +1024   1024      .  flush_tlb_mm
>>     0  +1024   1024      .  flush_tlb_current_task
> 
> Quite good! Can we fix those TLB flush cpumask uses too?

Here is one proposal.  I don't like increasing PER_CPU area but eventually we
should be able to per_cpu_alloc the cpumasks so they only take up enough room
as needed.  Also, it was unclear whether one scratch cpumask work suffice for
all three flush_tlb_* functions or a separate one for each was needed.  I
went for the safe route and used one for each function.

An alternate approach might be to add a scratch cpumask to the smp_flush_state
struct.  It already has one fixed cpumask_t (flush_cpumask) so this whole
struct should per_cpu_alloc'd.  (And since it's cacheline_aligned, then a
per_cpu_alloc_aligned() will probably be needed?)

(btw, only 64-bit shown here, 32-bit needs similar changes.)

Thanks,
Mike

---
Subject: cpumask: remove cpumask_t from stack for flush_tlb_xxx

Remove the scratch cpumask_t from the stack for the flush_tlb_*
functions.  Since we're disabling preemption, we can use a
one-per-cpu scratch cpumask.  "Un-const" the cpumask pointer arg
in native_flush_tlb_others as one of the flush_others functions
(uv) changed the cpumask.  I believe this is safe.

Signed-off-by: Mike Travis <travis@....com>
---
 arch/x86/include/asm/paravirt.h  |    8 ++---
 arch/x86/include/asm/tlbflush.h  |    4 +-
 arch/x86/include/asm/uv/uv_bau.h |    3 +-
 arch/x86/kernel/tlb_64.c         |   53 +++++++++++++++++++++------------------
 arch/x86/kernel/tlb_uv.c         |   12 ++++----
 5 files changed, 43 insertions(+), 37 deletions(-)

--- linux-2.6-for-ingo.orig/arch/x86/include/asm/paravirt.h
+++ linux-2.6-for-ingo/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,7 @@ struct pv_mmu_ops {
 	void (*flush_tlb_user)(void);
 	void (*flush_tlb_kernel)(void);
 	void (*flush_tlb_single)(unsigned long addr);
-	void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+	void (*flush_tlb_others)(struct cpumask *cpus, struct mm_struct *mm,
 				 unsigned long va);
 
 	/* Hooks for allocating and freeing a pagetable top-level */
@@ -984,10 +984,10 @@ static inline void __flush_tlb_single(un
 	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
 }
 
-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-				    unsigned long va)
+static inline void flush_tlb_others(struct cpumask *cpumask,
+				    struct mm_struct *mm, unsigned long va)
 {
-	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
 }
 
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
--- linux-2.6-for-ingo.orig/arch/x86/include/asm/tlbflush.h
+++ linux-2.6-for-ingo/arch/x86/include/asm/tlbflush.h
@@ -142,7 +142,7 @@ static inline void flush_tlb_range(struc
 	flush_tlb_mm(vma->vm_mm);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+void native_flush_tlb_others(struct cpumask *cpumask, struct mm_struct *mm,
 			     unsigned long va);
 
 #define TLBSTATE_OK	1
@@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(v
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(mask, mm, va)
 #endif
 
 static inline void flush_tlb_kernel_range(unsigned long start,
--- linux-2.6-for-ingo.orig/arch/x86/include/asm/uv/uv_bau.h
+++ linux-2.6-for-ingo/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(str
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+extern int uv_flush_tlb_others(struct cpumask *, struct mm_struct *,
+			       unsigned long);
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
--- linux-2.6-for-ingo.orig/arch/x86/kernel/tlb_64.c
+++ linux-2.6-for-ingo/arch/x86/kernel/tlb_64.c
@@ -157,14 +157,13 @@ out:
 	inc_irq_stat(irq_tlb_count);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+void native_flush_tlb_others(struct cpumask *cpumask, struct mm_struct *mm,
 			     unsigned long va)
 {
 	int sender;
 	union smp_flush_state *f;
-	cpumask_t cpumask = *cpumaskp;
 
-	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
+	if (is_uv_system() && uv_flush_tlb_others(cpumask, mm, va))
 		return;
 
 	/* Caller has disabled preemption */
@@ -180,7 +179,7 @@ void native_flush_tlb_others(const cpuma
 
 	f->flush_mm = mm;
 	f->flush_va = va;
-	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+	cpumask_or(&f->flush_cpumask, cpumask, &f->flush_cpumask);
 
 	/*
 	 * Make the above memory operations globally visible before
@@ -191,9 +190,9 @@ void native_flush_tlb_others(const cpuma
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
-	while (!cpus_empty(f->flush_cpumask))
+	while (!cpumask_empty(&f->flush_cpumask))
 		cpu_relax();
 
 	f->flush_mm = NULL;
@@ -212,28 +211,32 @@ static int __cpuinit init_smp_flush(void
 }
 core_initcall(init_smp_flush);
 
+static DEFINE_PER_CPU(cpumask_t, flush_tlb_task_cpumask);
+
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
+	struct cpumask *cpu_mask;
 
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpu_mask = &get_cpu_var(flush_tlb_task_cpumask);
+	cpumask_copy(cpu_mask, &mm->cpu_vm_mask);
+	cpumask_clear_cpu(smp_processor_id(), cpu_mask);
 
 	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-	preempt_enable();
+	put_cpu_var(flush_tlb_task_cpumask);
 }
 
+static DEFINE_PER_CPU(cpumask_t, flush_tlb_mm_cpumask);
+
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t cpu_mask;
+	struct cpumask * cpu_mask;
 
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpu_mask = &get_cpu_var(flush_tlb_mm_cpumask);
+	cpumask_copy(cpu_mask, &mm->cpu_vm_mask);
+	cpumask_clear_cpu(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -241,20 +244,22 @@ void flush_tlb_mm(struct mm_struct *mm)
 		else
 			leave_mm(smp_processor_id());
 	}
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
-	preempt_enable();
+	put_cpu_var(flush_tlb_mm_cpumask);
 }
 
+static DEFINE_PER_CPU(cpumask_t, flush_tlb_page_cpumask);
+
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
+	struct cpumask *cpu_mask;
 
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpu_mask = &get_cpu_var(flush_tlb_page_cpumask);
+	cpumask_copy(cpu_mask, &mm->cpu_vm_mask);
+	cpumask_clear_cpu(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -263,10 +268,10 @@ void flush_tlb_page(struct vm_area_struc
 			leave_mm(smp_processor_id());
 	}
 
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, va);
 
-	preempt_enable();
+	put_cpu_var(flush_tlb_page_cpumask);
 }
 
 static void do_flush_tlb_all(void *info)
--- linux-2.6-for-ingo.orig/arch/x86/kernel/tlb_uv.c
+++ linux-2.6-for-ingo/arch/x86/kernel/tlb_uv.c
@@ -216,7 +216,7 @@ static int uv_wait_completion(struct bau
  * unchanged.
  */
 int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
-			   cpumask_t *cpumaskp)
+			   struct cpumask *cpumaskp)
 {
 	int completion_status = 0;
 	int right_shift;
@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int 
 	 * Success, so clear the remote cpu's from the mask so we don't
 	 * use the IPI method of shootdown on them.
 	 */
-	for_each_cpu_mask(bit, *cpumaskp) {
+	for_each_cpu(bit, cpumaskp) {
 		blade = uv_cpu_to_blade_id(bit);
 		if (blade == this_blade)
 			continue;
-		cpu_clear(bit, *cpumaskp);
+		cpumask_clear_cpu(bit, cpumaskp);
 	}
-	if (!cpus_empty(*cpumaskp))
+	if (!cpumask_empty(cpumaskp))
 		return 0;
 	return 1;
 }
@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int 
  * Returns 1 if all remote flushing was done.
  * Returns 0 if some remote flushing remains to be done.
  */
-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
 			unsigned long va)
 {
 	int i;
@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpuma
 	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
 	i = 0;
-	for_each_cpu_mask(bit, *cpumaskp) {
+	for_each_cpu(bit, cpumaskp) {
 		blade = uv_cpu_to_blade_id(bit);
 		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
 		if (blade == this_blade) {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/