[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1484596275-30412-1-git-send-email-mathieu.desnoyers@efficios.com>
Date: Mon, 16 Jan 2017 14:51:15 -0500
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: "Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>
Cc: linux-kernel@...r.kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Josh Triplett <josh@...htriplett.org>,
KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
Steven Rostedt <rostedt@...dmis.org>,
Nicholas Miell <nmiell@...cast.net>,
Ingo Molnar <mingo@...hat.com>,
Alan Cox <gnomes@...rguk.ukuu.org.uk>,
Lai Jiangshan <laijs@...fujitsu.com>,
Stephen Hemminger <stephen@...workplumber.org>,
Thomas Gleixner <tglx@...utronix.de>,
Peter Zijlstra <peterz@...radead.org>,
David Howells <dhowells@...hat.com>,
Pranith Kumar <bobby.prani@...il.com>,
Michael Kerrisk <mtk.manpages@...il.com>,
Shuah Khan <shuahkh@....samsung.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Linus Torvalds <torvalds@...ux-foundation.org>
Subject: [RFC PATCH] membarrier: handle nohz_full with expedited thread registration
Threads running on nohz_full CPUs are not considered by
synchronize_sched, but they should be covered by a membarrier system
call with MEMBARRIER_CMD_SHARED command.
Introduce two new commands to membarrier:
MEMBARRIER_CMD_REGISTER_EXPEDITED and
MEMBARRIER_CMD_UNREGISTER_EXPEDITED.
No-hz full threads requiring to receive interrupts to ensure correct
memory ordering pairing compiler barriers with membarrier system call
should register as "expedited" threads.
[ This RFC patch lacks documentation. I mainly want feedback to see if
everyone is OK with the general approach. ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Josh Triplett <josh@...htriplett.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Nicholas Miell <nmiell@...cast.net>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Alan Cox <gnomes@...rguk.ukuu.org.uk>
Cc: Lai Jiangshan <laijs@...fujitsu.com>
Cc: Stephen Hemminger <stephen@...workplumber.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: David Howells <dhowells@...hat.com>
Cc: Pranith Kumar <bobby.prani@...il.com>
Cc: Michael Kerrisk <mtk.manpages@...il.com>
Cc: Shuah Khan <shuahkh@....samsung.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
---
fs/exec.c | 1 +
include/linux/sched.h | 27 +++++++++++++++
include/uapi/linux/membarrier.h | 6 ++++
kernel/fork.c | 2 ++
kernel/membarrier.c | 77 +++++++++++++++++++++++++++++++++++++++--
5 files changed, 111 insertions(+), 2 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index e579466..2cf1f87 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1771,6 +1771,7 @@ static int do_execveat_common(int fd, struct filename *filename,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
+ membarrier_execve(current);
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad3ec9e..1242eb9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1998,6 +1998,9 @@ struct task_struct {
/* A live task holds one reference. */
atomic_t stack_refcount;
#endif
+#ifdef CONFIG_MEMBARRIER
+ unsigned int membarrier_expedited;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
@@ -3671,4 +3674,28 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
+#ifdef CONFIG_MEMBARRIER
+static inline void membarrier_fork(struct task_struct *t,
+ unsigned long clone_flags)
+{
+ if (clone_flags & CLONE_THREAD)
+ t->membarrier_expedited = 0;
+ else
+ t->membarrier_expedited = current->membarrier_expedited;
+}
+
+static inline void membarrier_execve(struct task_struct *t)
+{
+ t->membarrier_expedited = 0;
+}
+#else
+static inline void membarrier_fork(struct task_struct *t,
+ unsigned long clone_flags)
+{
+}
+static inline void membarrier_execve(struct task_struct *t)
+{
+}
+#endif
+
#endif
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index e0b108b..4b78f07 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -40,6 +40,10 @@
* (non-running threads are de facto in such a
* state). This covers threads from all processes
* running on the system. This command returns 0.
+ * @MEMBARRIER_CMD_REGISTER_EXPEDITED:
+ * TODO
+ * @MEMBARRIER_CMD_UNREGISTER_EXPEDITED:
+ * TODO
*
* Command to be passed to the membarrier system call. The commands need to
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
@@ -48,6 +52,8 @@
enum membarrier_cmd {
MEMBARRIER_CMD_QUERY = 0,
MEMBARRIER_CMD_SHARED = (1 << 0),
+ MEMBARRIER_CMD_REGISTER_EXPEDITED = (1 << 1),
+ MEMBARRIER_CMD_UNREGISTER_EXPEDITED = (1 << 2),
};
#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 11c5c8a..cec23e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1769,6 +1769,8 @@ static __latent_entropy struct task_struct *copy_process(
*/
copy_seccomp(p);
+ membarrier_fork(p, clone_flags);
+
/*
* Process group and session signals need to be delivered to just the
* parent before the fork or both the parent and the child after the
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
index 536c727..65a6fbf 100644
--- a/kernel/membarrier.c
+++ b/kernel/membarrier.c
@@ -16,12 +16,79 @@
#include <linux/syscalls.h>
#include <linux/membarrier.h>
+#include <linux/tick.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+
+/*
+ * TODO: private sched.h is needed for runqueue. Should we move the
+ * sched code under kernel/sched/ ?
+ */
+#include "sched/sched.h"
/*
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
* except MEMBARRIER_CMD_QUERY.
*/
-#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED)
+#define MEMBARRIER_CMD_BITMASK \
+ (MEMBARRIER_CMD_SHARED \
+ | MEMBARRIER_CMD_REGISTER_EXPEDITED \
+ | MEMBARRIER_CMD_UNREGISTER_EXPEDITED)
+
+static int membarrier_register_expedited(struct task_struct *t)
+{
+ struct rq *rq;
+
+ if (t->membarrier_expedited == UINT_MAX)
+ return -EOVERFLOW;
+ rq = this_rq();
+ raw_spin_lock(&rq->lock);
+ t->membarrier_expedited++;
+ raw_spin_unlock(&rq->lock);
+ return 0;
+}
+
+static int membarrier_unregister_expedited(struct task_struct *t)
+{
+ struct rq *rq;
+
+ if (!t->membarrier_expedited)
+ return -ENOENT;
+ rq = this_rq();
+ raw_spin_lock(&rq->lock);
+ t->membarrier_expedited--;
+ raw_spin_unlock(&rq->lock);
+ return 0;
+}
+
+static void memory_barrier(void *info)
+{
+ smp_mb();
+}
+
+static void membarrier_nohz_full_expedited(void)
+{
+ int cpu;
+
+ if (!tick_nohz_full_enabled())
+ return;
+ for_each_cpu(cpu, tick_nohz_full_mask) {
+ struct rq *rq;
+ struct task_struct *t;
+
+ rq = cpu_rq(cpu);
+ raw_spin_lock(&rq->lock);
+ t = rq->curr;
+ if (t->membarrier_expedited) {
+ int ret;
+
+ ret = smp_call_function_single(cpu, memory_barrier,
+ NULL, 1);
+ WARN_ON_ONCE(ret);
+ }
+ raw_spin_unlock(&rq->lock);
+ }
+}
/**
* sys_membarrier - issue memory barriers on a set of threads
@@ -57,9 +124,15 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
case MEMBARRIER_CMD_QUERY:
return MEMBARRIER_CMD_BITMASK;
case MEMBARRIER_CMD_SHARED:
- if (num_online_cpus() > 1)
+ if (num_online_cpus() > 1) {
synchronize_sched();
+ membarrier_nohz_full_expedited();
+ }
return 0;
+ case MEMBARRIER_CMD_REGISTER_EXPEDITED:
+ return membarrier_register_expedited(current);
+ case MEMBARRIER_CMD_UNREGISTER_EXPEDITED:
+ return membarrier_unregister_expedited(current);
default:
return -EINVAL;
}
--
2.1.4
Powered by blists - more mailing lists