[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140213201618.GD14089@laptop.programming.kicks-ass.net>
Date: Thu, 13 Feb 2014 21:16:18 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Andy Lutomirski <luto@...capital.net>
Cc: Thomas Gleixner <tglx@...utronix.de>,
Mike Galbraith <bitbucket@...ine.de>, X86 ML <x86@...nel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [RFC] sched: Add a new lockless wake-from-idle implementation
On Thu, Feb 13, 2014 at 09:07:10AM -0800, Andy Lutomirski wrote:
> > I also don't really like how the polling state is an atomic; its a cpu
> > local property.
>
> Your patch also makes polling state be an atomic (albeit one that
> isn't changed remotely).
Yah, sorry for that, changed the email (and code about) a number of
times before posting :/
> On the subject of major surgery, though: there are very few places in
> the kernel where TIF_NEED_RESCHED gets set. With something like my
> patch applied, I think that there is no code at all that sets any
> other task's TIF_NEED_RESCHED. That suggests that all
> set_tsk_need_resched callers could just call into the scheduler
> directly.
One of the main callers would be the timer tick for local preemption;
that's from interrupt context, can't call schedule() there, really needs
to be the interrupt return path.
> If so, the change could probably delete a whole lot of
> assembly code, and every kernel exit would get faster.
We already need to load that word for all kinds of other things; like
delivering signals, so testing the one bit in the return path is
basically free.
Anyway; after all this mucking about I finally remembered Venki once
attempted something similar:
https://lkml.org/lkml/2012/2/6/357
How about something like this?
---
arch/x86/include/asm/mwait.h | 33 ++++++++++++++++--------
arch/x86/kernel/process.c | 2 ++
arch/x86/kernel/smp.c | 61 ++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 1da25a5f96f9..cb7bb8bb6617 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_MWAIT_H
#define _ASM_X86_MWAIT_H
+#include <linux/percpu.h>
#include <linux/sched.h>
#define MWAIT_SUBSTATE_MASK 0xf
@@ -15,6 +16,14 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
+#define MWAIT_IPI_ENABLED 0x01
+#define MWAIT_IPI_RESCHED 0x02
+#define MWAIT_IPI_SINGLE 0x04
+
+extern void mwait_intercept_handler(void);
+
+DECLARE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
@@ -42,18 +51,20 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
*/
static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
- if (!current_set_polling_and_test()) {
- if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
- mb();
- clflush((void *)¤t_thread_info()->flags);
- mb();
- }
-
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
- if (!need_resched())
- __mwait(eax, ecx);
+ unsigned int *ptr = this_cpu_ptr(&mwait_ipi);
+ unsigned int old = xchg(ptr, MWAIT_IPI_ENABLED);
+
+ WARN_ON_ONCE(old);
+
+ if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
+ mb();
+ clflush((void *)ptr);
+ mb();
}
- current_clr_polling();
+
+ __monitor((void *)ptr, 0, 0);
+ if (!(*ptr & ~MWAIT_IPI_ENABLED))
+ __mwait(eax, ecx);
}
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..00afb2b676b8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
+#include <asm/mwait.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -286,6 +287,7 @@ void arch_cpu_idle_enter(void)
void arch_cpu_idle_exit(void)
{
__exit_idle();
+ mwait_intercept_handler();
}
void arch_cpu_idle_dead(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e4..4b078a8d6b83 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,8 @@
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
@@ -31,6 +33,8 @@
#include <asm/apic.h>
#include <asm/nmi.h>
#include <asm/trace/irq_vectors.h>
+#include <asm/mwait.h>
+
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -113,6 +117,56 @@
static atomic_t stopping_cpu = ATOMIC_INIT(-1);
static bool smp_no_nmi_ipi = false;
+DEFINE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+EXPORT_PER_CPU_SYMBOL_GPL(mwait_ipi);
+
+static bool mwait_intercept(int cpu, int ipi)
+{
+ u32 *ptr = &per_cpu(mwait_ipi, cpu);
+ u32 val, new, old;
+
+ if (!static_cpu_has(X86_FEATURE_MWAIT))
+ return false;
+
+ val = *ptr;
+ if (!(val & MWAIT_IPI_ENABLED))
+ return false;
+
+ for (;;) {
+ new = val | ipi;
+ old = cmpxchg(ptr, val, new);
+ if (old == val)
+ break;
+ val = old;
+ }
+
+ if (!(old & MWAIT_IPI_ENABLED))
+ return false;
+
+ return true;
+}
+
+void mwait_intercept_handler(void)
+{
+ unsigned int val, *ptr;
+
+ if (!static_cpu_has(X86_FEATURE_MWAIT))
+ return;
+
+ ptr = this_cpu_ptr(&mwait_ipi);
+ val = xchg(ptr, 0);
+
+ if (!(val & ~MWAIT_IPI_ENABLED))
+ return;
+
+ local_irq_disable();
+ if (val & MWAIT_IPI_RESCHED)
+ scheduler_ipi();
+ if (val & MWAIT_IPI_SINGLE)
+ generic_smp_call_function_single_interrupt();
+ local_irq_enable();
+}
+
/*
* this function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
@@ -124,12 +178,15 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
- apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+
+ if (!mwait_intercept(cpu, MWAIT_IPI_RESCHED))
+ apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ if (!mwait_intercept(cpu, MWAIT_IPI_SINGLE))
+ apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
void native_send_call_func_ipi(const struct cpumask *mask)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists