[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20150414120532.GA23865@gmail.com>
Date: Tue, 14 Apr 2015 14:05:32 +0200
From: Ingo Molnar <mingo@...nel.org>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org,
Frédéric Weisbecker <fweisbec@...il.com>,
Thomas Gleixner <tglx@...utronix.de>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Andrew Morton <akpm@...ux-foundation.org>
Subject: [GIT PULL] NOHZ changes for v4.1
Linus,
Please pull the latest timers-nohz-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers-nohz-for-linus
# HEAD: 1524b745406a85ba201cb25df72110c1ccac0f72 Merge branch 'nohz/guest' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
This tree adds full dynticks support to KVM guests (support the
disabling of the timer tick on the guest): the main missing piece was
the recognition of guest execution as RCU extended quiescent state and
related changes.
Thanks,
Ingo
------------------>
Frederic Weisbecker (2):
ppc: Remove unused cpp symbols in kvm headers
context_tracking: Rename context symbols to prepare for transition state
Rik van Riel (5):
context_tracking: Generalize context tracking APIs to support user and guest
context_tracking: Add stub context_tracking_is_enabled
context_tracking: Run vtime_user_enter/exit only when state == CONTEXT_USER
context_tracking: Export context_tracking_user_enter/exit
kvm,rcu,nohz: use RCU extended quiescent state when running KVM guest
arch/powerpc/include/asm/kvm_book3s.h | 4 ---
arch/x86/kernel/traps.c | 2 +-
include/linux/context_tracking.h | 15 +++++++--
include/linux/context_tracking_state.h | 9 ++++--
include/linux/kvm_host.h | 3 +-
kernel/context_tracking.c | 59 ++++++++++++++++++++++------------
kernel/sched/core.c | 2 +-
7 files changed, 61 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1678e3..993090422690 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -106,10 +106,6 @@ struct kvmppc_vcpu_book3s {
spinlock_t mmu_lock;
};
-#define CONTEXT_HOST 0
-#define CONTEXT_GUEST 1
-#define CONTEXT_GUEST_END 2
-
#define VSID_REAL 0x07ffffffffc00000ULL
#define VSID_BAT 0x07ffffffffb00000ULL
#define VSID_64K 0x0800000000000000ULL
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9d2073e2ecc9..756f74eed35d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -123,7 +123,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
* but we need to notify RCU.
*/
rcu_nmi_enter();
- prev_state = IN_KERNEL; /* the value is irrelevant. */
+ prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
}
/*
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 37b81bd51ec0..2821838256b4 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,8 @@
#ifdef CONFIG_CONTEXT_TRACKING
extern void context_tracking_cpu_set(int cpu);
+extern void context_tracking_enter(enum ctx_state state);
+extern void context_tracking_exit(enum ctx_state state);
extern void context_tracking_user_enter(void);
extern void context_tracking_user_exit(void);
extern void __context_tracking_task_switch(struct task_struct *prev,
@@ -35,7 +37,8 @@ static inline enum ctx_state exception_enter(void)
return 0;
prev_ctx = this_cpu_read(context_tracking.state);
- context_tracking_user_exit();
+ if (prev_ctx != CONTEXT_KERNEL)
+ context_tracking_exit(prev_ctx);
return prev_ctx;
}
@@ -43,8 +46,8 @@ static inline enum ctx_state exception_enter(void)
static inline void exception_exit(enum ctx_state prev_ctx)
{
if (context_tracking_is_enabled()) {
- if (prev_ctx == IN_USER)
- context_tracking_user_enter();
+ if (prev_ctx != CONTEXT_KERNEL)
+ context_tracking_enter(prev_ctx);
}
}
@@ -78,10 +81,16 @@ static inline void guest_enter(void)
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
+
+ if (context_tracking_is_enabled())
+ context_tracking_enter(CONTEXT_GUEST);
}
static inline void guest_exit(void)
{
+ if (context_tracking_is_enabled())
+ context_tracking_exit(CONTEXT_GUEST);
+
if (vtime_accounting_enabled())
vtime_guest_exit(current);
else
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 97a81225d037..6b7b96a32b75 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -13,8 +13,9 @@ struct context_tracking {
*/
bool active;
enum ctx_state {
- IN_KERNEL = 0,
- IN_USER,
+ CONTEXT_KERNEL = 0,
+ CONTEXT_USER,
+ CONTEXT_GUEST,
} state;
};
@@ -34,11 +35,13 @@ static inline bool context_tracking_cpu_is_enabled(void)
static inline bool context_tracking_in_user(void)
{
- return __this_cpu_read(context_tracking.state) == IN_USER;
+ return __this_cpu_read(context_tracking.state) == CONTEXT_USER;
}
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline bool context_tracking_active(void) { return false; }
+static inline bool context_tracking_is_enabled(void) { return false; }
+static inline bool context_tracking_cpu_is_enabled(void) { return false; }
#endif /* CONFIG_CONTEXT_TRACKING */
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d12b2104d19b..cc8c61c5459c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -766,7 +766,8 @@ static inline void kvm_guest_enter(void)
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
- rcu_virt_note_context_switch(smp_processor_id());
+ if (!context_tracking_cpu_is_enabled())
+ rcu_virt_note_context_switch(smp_processor_id());
}
static inline void kvm_guest_exit(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 937ecdfdf258..72d59a1a6eb6 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -39,15 +39,15 @@ void context_tracking_cpu_set(int cpu)
}
/**
- * context_tracking_user_enter - Inform the context tracking that the CPU is going to
- * enter userspace mode.
+ * context_tracking_enter - Inform the context tracking that the CPU is going
+ * enter user or guest space mode.
*
* This function must be called right before we switch from the kernel
- * to userspace, when it's guaranteed the remaining kernel instructions
- * to execute won't use any RCU read side critical section because this
- * function sets RCU in extended quiescent state.
+ * to user or guest space, when it's guaranteed the remaining kernel
+ * instructions to execute won't use any RCU read side critical section
+ * because this function sets RCU in extended quiescent state.
*/
-void context_tracking_user_enter(void)
+void context_tracking_enter(enum ctx_state state)
{
unsigned long flags;
@@ -75,9 +75,8 @@ void context_tracking_user_enter(void)
WARN_ON_ONCE(!current->mm);
local_irq_save(flags);
- if ( __this_cpu_read(context_tracking.state) != IN_USER) {
+ if ( __this_cpu_read(context_tracking.state) != state) {
if (__this_cpu_read(context_tracking.active)) {
- trace_user_enter(0);
/*
* At this stage, only low level arch entry code remains and
* then we'll run in userspace. We can assume there won't be
@@ -85,7 +84,10 @@ void context_tracking_user_enter(void)
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
* on the tick.
*/
- vtime_user_enter(current);
+ if (state == CONTEXT_USER) {
+ trace_user_enter(0);
+ vtime_user_enter(current);
+ }
rcu_user_enter();
}
/*
@@ -101,24 +103,32 @@ void context_tracking_user_enter(void)
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
* is false because we know that CPU is not tickless.
*/
- __this_cpu_write(context_tracking.state, IN_USER);
+ __this_cpu_write(context_tracking.state, state);
}
local_irq_restore(flags);
}
+NOKPROBE_SYMBOL(context_tracking_enter);
+EXPORT_SYMBOL_GPL(context_tracking_enter);
+
+void context_tracking_user_enter(void)
+{
+ context_tracking_enter(CONTEXT_USER);
+}
NOKPROBE_SYMBOL(context_tracking_user_enter);
/**
- * context_tracking_user_exit - Inform the context tracking that the CPU is
- * exiting userspace mode and entering the kernel.
+ * context_tracking_exit - Inform the context tracking that the CPU is
+ * exiting user or guest mode and entering the kernel.
*
- * This function must be called after we entered the kernel from userspace
- * before any use of RCU read side critical section. This potentially include
- * any high level kernel code like syscalls, exceptions, signal handling, etc...
+ * This function must be called after we entered the kernel from user or
+ * guest space before any use of RCU read side critical section. This
+ * potentially include any high level kernel code like syscalls, exceptions,
+ * signal handling, etc...
*
* This call supports re-entrancy. This way it can be called from any exception
* handler without needing to know if we came from userspace or not.
*/
-void context_tracking_user_exit(void)
+void context_tracking_exit(enum ctx_state state)
{
unsigned long flags;
@@ -129,20 +139,29 @@ void context_tracking_user_exit(void)
return;
local_irq_save(flags);
- if (__this_cpu_read(context_tracking.state) == IN_USER) {
+ if (__this_cpu_read(context_tracking.state) == state) {
if (__this_cpu_read(context_tracking.active)) {
/*
* We are going to run code that may use RCU. Inform
* RCU core about that (ie: we may need the tick again).
*/
rcu_user_exit();
- vtime_user_exit(current);
- trace_user_exit(0);
+ if (state == CONTEXT_USER) {
+ vtime_user_exit(current);
+ trace_user_exit(0);
+ }
}
- __this_cpu_write(context_tracking.state, IN_KERNEL);
+ __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
}
local_irq_restore(flags);
}
+NOKPROBE_SYMBOL(context_tracking_exit);
+EXPORT_SYMBOL_GPL(context_tracking_exit);
+
+void context_tracking_user_exit(void)
+{
+ context_tracking_exit(CONTEXT_USER);
+}
NOKPROBE_SYMBOL(context_tracking_user_exit);
/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e8a345..06b9a00871e0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2818,7 +2818,7 @@ asmlinkage __visible void __sched schedule_user(void)
* we find a better solution.
*
* NB: There are buggy callers of this function. Ideally we
- * should warn if prev_state != IN_USER, but that will trigger
+ * should warn if prev_state != CONTEXT_USER, but that will trigger
* too frequently to make sense yet.
*/
enum ctx_state prev_state = exception_enter();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists