lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1251208265.7538.1157.camel@twins>
Date:	Tue, 25 Aug 2009 15:51:05 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Arnaldo Carvalho de Melo <acme@...stprotocols.net>,
	Ingo Molnar <mingo@...e.hu>,
	Thomas Gleixner <tglx@...utronix.de>
Cc:	linux-kernel <linux-kernel@...r.kernel.org>
Subject: [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq

Use timer softirq for wakeups on preempt_rt

Normally pending work is work that cannot be done from NMI context, such
as wakeups and disabling the counter. The pending work is a single
linked list using atomic ops so that it functions from NMI context.

Normally this is called from IRQ context through use of an self-IPI
(x86) or upon enabling hard interrupts (powerpc). Architectures that do
not implement perf_counter_set_pending() nor call
perf_counter_do_pending() upon leaving NMI context will get a polling
fallback from the timer softirq.

However, in -rt we cannot do the wakeup from IRQ context because its a
wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
fallback by creating a second pending list that's only processed from
there.

[ not tested at all... ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
 include/linux/perf_counter.h |    5 +++
 kernel/perf_counter.c        |   61 +++++++++++++++++++++++++++++++++--------
 kernel/timer.c               |    2 +-
 4 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d..e61eee1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -612,6 +612,9 @@ struct perf_counter {
 	int				pending_kill;
 	int				pending_disable;
 	struct perf_pending_entry	pending;
+#ifdef CONFIG_PREEMPT_RT
+	struct perf_pending_entry	pending_softirq;
+#endif
 
 	atomic_t			event_limit;
 
@@ -703,6 +706,7 @@ extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_free_task(struct task_struct *task);
 extern void set_perf_counter_pending(void);
 extern void perf_counter_do_pending(void);
+extern void perf_counter_do_pending_softirq(void);
 extern void perf_counter_print_debug(void);
 extern void __perf_disable(void);
 extern bool __perf_enable(void);
@@ -787,6 +791,7 @@ static inline int perf_counter_init_task(struct task_struct *child)	{ return 0;
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_free_task(struct task_struct *task)	{ }
 static inline void perf_counter_do_pending(void)			{ }
+static inline void perf_counter_do_pending_softirq(void)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 53abcbe..d3b065d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2397,45 +2397,69 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
 		__perf_counter_disable(counter);
 	}
 
+#ifndef CONFIG_PREEMPT_RT
 	if (counter->pending_wakeup) {
 		counter->pending_wakeup = 0;
 		perf_counter_wakeup(counter);
 	}
+#endif
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending_softirq);
+
+	if (counter->pending_wakeup) {
+		counter->pending_wakeup = 0;
+		perf_counter_wakeup(counter);
+	}
+}
+#endif
+
 #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
 static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
 	PENDING_TAIL,
 };
 
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
+	PENDING_TAIL,
+};
 
+static void __perf_pending_queue(struct perf_pending_entry **head,
+				 struct perf_pending_entry *entry,
+			         void (*func)(struct perf_pending_entry *))
+{
 	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
 		return;
 
 	entry->func = func;
 
-	head = &get_cpu_var(perf_pending_head);
-
 	do {
 		entry->next = *head;
 	} while (cmpxchg(head, entry->next, entry) != entry->next);
+}
 
-	set_perf_counter_pending();
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+       
+	head = &get_cpu_var(perf_pending_head);
+	__perf_pending_queue(head, entry, func);
+	put_cpu_var(perf_pending_head);
 
-	put_cpu_var(perf_pending_head);
+	set_perf_counter_pending();
 }
 
-static int __perf_pending_run(void)
+static int __perf_pending_run(struct perf_pending_entry **head)
 {
 	struct perf_pending_entry *list;
 	int nr = 0;
 
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	list = xchg(head, PENDING_TAIL);
 	while (list != PENDING_TAIL) {
 		void (*func)(struct perf_pending_entry *);
 		struct perf_pending_entry *entry = list;
@@ -2465,7 +2489,8 @@ static inline int perf_not_pending(struct perf_counter *counter)
 	 * need to wait.
 	 */
 	get_cpu();
-	__perf_pending_run();
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+	__perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
 	put_cpu();
 
 	/*
@@ -2483,7 +2508,13 @@ static void perf_pending_sync(struct perf_counter *counter)
 
 void perf_counter_do_pending(void)
 {
-	__perf_pending_run();
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+}
+
+void perf_counter_do_pending_softirq(void)
+{
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+	__perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
 }
 
 /*
@@ -2543,8 +2574,14 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 
 	if (handle->nmi) {
 		handle->counter->pending_wakeup = 1;
+#ifndef CONFIG_PREEMPT_RT
 		perf_pending_queue(&handle->counter->pending,
 				   perf_pending_counter);
+#else
+		__perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
+				     &handle->counter->pending_softirq,
+				     perf_pending_counter_softirq);
+#endif
 	} else
 		perf_counter_wakeup(handle->counter);
 }
diff --git a/kernel/timer.c b/kernel/timer.c
index 33fc9d1..1dd1456 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1188,7 +1188,7 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
-	perf_counter_do_pending();
+	perf_counter_do_pending_softirq();
 
 	hrtimer_run_pending();
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ