lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.00.1012152052350.9145@davide-lnx1>
Date:	Wed, 15 Dec 2010 20:55:01 -0800 (PST)
From:	Davide Libenzi <davidel@...ilserver.org>
To:	Timo Sirainen <tss@....fi>
cc:	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: poll()ing a shared pipe triggers unnecessary context switches

On Wed, 15 Dec 2010, Timo Sirainen wrote:

> I have one master process reading status updates from a pipe shared by
> multiple child processes. The child processes also poll() the pipe's
> write fd to see when the master process dies. This works fine, except
> each time a child process writes to the pipe, all the child processes
> wake up in poll()/epoll_wait() and go back to sleep without returning
> anything, triggering unnecessary context switches.
> 
> I worked around this by creating yet another pipe just for listening the
> master's death, but this still seems like a Linux bug to me. Solaris and
> BSDs don't behave like this.

Hmm, I *thought* I had already posted this.
The code is in place, in fs/select.c, to filter wakeups based on keys, we 
just need to wire up the event-based wakeups to pipes.
See patch below, which should take care of both poll and epoll (and select).




> Test program:
> 
> /*
>    gcc poller.c -o poller -Wall
> 
>    Usage: ./poller
>    Wait for a while if it prints any "volcs for poll() = .." messages.
>    It shouldn't.
>    Program stops by hitting enter.
> */
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> #include <poll.h>
> #include <sys/time.h>
> #include <sys/resource.h>
> 
> void child(int fd)
> {
> 	struct pollfd pfd;
> 	struct rusage ru1, ru2;
> 	int diff;
> 
> 	memset(&pfd, 0, sizeof(pfd));
> 	pfd.fd = fd;
> 	pfd.events = 0;
> 	while (write(fd, "foo\n", 4) == 4) {
> 		getrusage(RUSAGE_SELF, &ru1);
> 		if (poll(&pfd, 1, 100) != 0)
> 			break;
> 		getrusage(RUSAGE_SELF, &ru2);
> 
> 		diff = ru2.ru_nvcsw - ru1.ru_nvcsw;
> 		if (diff > 2)
> 			printf("volcs for poll() = %d\n", diff);
> 
> 	}
> 	printf("%d exited\n", (int)getpid());
> 	exit(0);
> }
> 
> int main(void)
> {
> 	struct pollfd pfds[2];
> 	int i, fd[2];
> 	char buf[1024];
> 
> 	pipe(fd);
> 	for (i = 0; i < 10; i++) {
> 		if (fork() == 0) {
> 			close(fd[0]);
> 			child(fd[1]);
> 		}
> 	}
> 
> 	memset(pfds, 0, sizeof(pfds));
> 	pfds[0].fd = 0;
> 	pfds[0].events = POLLIN;
> 	pfds[1].fd = fd[0];
> 	pfds[1].events = POLLIN;
> 	while (poll(pfds, 2, -1) > 0) {
> 		if (pfds[0].revents != 0)
> 			break;
> 		if (pfds[1].revents != 0)
> 			read(fd[0], buf, sizeof(buf));
> 	}
> 
> 	close(fd[0]);
> 	close(fd[1]);
> 	printf("done\n");
> 	return 1;
> }



---
 fs/pipe.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Index: linux-2.6.mod/fs/pipe.c
===================================================================
--- linux-2.6.mod.orig/fs/pipe.c	2010-12-15 20:49:48.000000000 -0800
+++ linux-2.6.mod/fs/pipe.c	2010-12-15 20:51:42.000000000 -0800
@@ -441,7 +441,7 @@
 			break;
 		}
 		if (do_wakeup) {
-			wake_up_interruptible_sync(&pipe->wait);
+			wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
  			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 		}
 		pipe_wait(pipe);
@@ -450,7 +450,7 @@
 
 	/* Signal writers asynchronously that there is more room. */
 	if (do_wakeup) {
-		wake_up_interruptible_sync(&pipe->wait);
+		wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
 	if (ret > 0)
@@ -612,7 +612,7 @@
 			break;
 		}
 		if (do_wakeup) {
-			wake_up_interruptible_sync(&pipe->wait);
+			wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 			do_wakeup = 0;
 		}
@@ -623,7 +623,7 @@
 out:
 	mutex_unlock(&inode->i_mutex);
 	if (do_wakeup) {
-		wake_up_interruptible_sync(&pipe->wait);
+		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
 	if (ret > 0)
@@ -715,7 +715,7 @@
 	if (!pipe->readers && !pipe->writers) {
 		free_pipe_info(inode);
 	} else {
-		wake_up_interruptible_sync(&pipe->wait);
+		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ