lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20081125194700.26EB.KOSAKI.MOTOHIRO@jp.fujitsu.com>
Date:	Tue, 25 Nov 2008 19:50:31 +0900 (JST)
From:	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
To:	Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>,
	Ingo Molnar <mingo@...e.hu>
Cc:	kosaki.motohiro@...fujitsu.com, ltt-dev@...ts.casi.polymtl.ca,
	linux-kernel@...r.kernel.org,
	William Lee Irwin III <wli@...omorphy.com>
Subject: [PATCH] Poll : introduce poll_wait_exclusive() new function


patch againt: tip/tracing/marker

==========
Currently, wake_up() function behavior depend on the way of
wait queue adding function.


                              wake_up()          wake_up_all()
---------------------------------------------------------------
add_wait_queue()              wake up all        wake up all
add_wait_queue_exclusive()    wake up one task   wake up all


Unforunately, poll_wait() always use add_wait_queue().
it means there is no way that wake up only one process in polled processes.
wake_up() also wake up all sleeping processes, not 1 process.


Mathieu Desnoyers explained it cause following problem to LTTng.

   In LTTng, all lttd readers are polling all the available debugfs files
   for data. This is principally because the number of reader threads is
   user-defined and there are typical workloads where a single CPU is
   producing most of the tracing data and all other CPUs are idle,
   available to consume data. It therefore makes sense not to tie those
   threads to specific buffers. However, when the number of threads grows,
   we face a "thundering herd" problem where many threads can be woken up
   and put back to sleep, leaving only a single thread doing useful work.


this patch introduce poll_wait_exclusive() new API for allow wake up
only one process.

<usage example>
unsigned int foo_device_poll(struct file *file,
		         struct poll_table_struct *wait)
{
	poll_wait_exclusive(file, &foo_wait_queue, wait);
	if (data_exist)
		return POLLIN | POLLRDNORM;
	return 0;
}
</usage example>


Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
CC: Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>
CC: Ingo Molnar <mingo@...e.hu>
---
 fs/eventpoll.c       |    7 +++++--
 fs/select.c          |    9 ++++++---
 include/linux/poll.h |   13 +++++++++++--
 3 files changed, 22 insertions(+), 7 deletions(-)



Index: b/fs/eventpoll.c
===================================================================
--- a/fs/eventpoll.c	2008-11-25 19:05:28.000000000 +0900
+++ b/fs/eventpoll.c	2008-11-25 19:15:50.000000000 +0900
@@ -655,7 +655,7 @@ out_unlock:
  * target file wakeup lists.
  */
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
-				 poll_table *pt)
+				 poll_table *pt, int exclusive)
 {
 	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
@@ -664,7 +664,10 @@ static void ep_ptable_queue_proc(struct 
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
-		add_wait_queue(whead, &pwq->wait);
+		if (exclusive)
+			add_wait_queue_exclusive(whead, &pwq->wait);
+		else
+			add_wait_queue(whead, &pwq->wait);
 		list_add_tail(&pwq->llink, &epi->pwqlist);
 		epi->nwait++;
 	} else {
Index: b/fs/select.c
===================================================================
--- a/fs/select.c	2008-11-25 19:04:26.000000000 +0900
+++ b/fs/select.c	2008-11-25 19:15:50.000000000 +0900
@@ -104,7 +104,7 @@ struct poll_table_page {
  * poll table.
  */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-		       poll_table *p);
+		       poll_table *p, int exclusive);
 
 void poll_initwait(struct poll_wqueues *pwq)
 {
@@ -173,7 +173,7 @@ static struct poll_table_entry *poll_get
 
 /* Add a new entry */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-				poll_table *p)
+		       poll_table *p, int exclusive)
 {
 	struct poll_table_entry *entry = poll_get_entry(p);
 	if (!entry)
@@ -182,7 +182,10 @@ static void __pollwait(struct file *filp
 	entry->filp = filp;
 	entry->wait_address = wait_address;
 	init_waitqueue_entry(&entry->wait, current);
-	add_wait_queue(wait_address, &entry->wait);
+	if (exclusive)
+		add_wait_queue_exclusive(wait_address, &entry->wait);
+	else
+		add_wait_queue(wait_address, &entry->wait);
 }
 
 /**
Index: b/include/linux/poll.h
===================================================================
--- a/include/linux/poll.h	2008-11-25 19:04:26.000000000 +0900
+++ b/include/linux/poll.h	2008-11-25 19:19:54.000000000 +0900
@@ -28,7 +28,8 @@ struct poll_table_struct;
 /* 
  * structures and helpers for f_op->poll implementations
  */
-typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
+typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *,
+				struct poll_table_struct *, int);
 
 typedef struct poll_table_struct {
 	poll_queue_proc qproc;
@@ -37,7 +38,15 @@ typedef struct poll_table_struct {
 static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 {
 	if (p && wait_address)
-		p->qproc(filp, wait_address, p);
+		p->qproc(filp, wait_address, p, 0);
+}
+
+static inline void poll_wait_exclusive(struct file *filp,
+				       wait_queue_head_t *wait_address,
+				       poll_table *p)
+{
+	if (p && wait_address)
+		p->qproc(filp, wait_address, p, 1);
 }
 
 static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ