lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180830202458.32579-3-subhra.mazumdar@oracle.com>
Date:   Thu, 30 Aug 2018 13:24:58 -0700
From:   subhra mazumdar <subhra.mazumdar@...cle.com>
To:     linux-kernel@...r.kernel.org
Cc:     peterz@...radead.org, dhaval.giani@...cle.com,
        steven.sistare@...cle.com, subhra.mazumdar@...cle.com
Subject: [RFC PATCH 2/2] pipe: use pipe busy wait

Enable busy waiting for pipes. pipe_busy_wait is called if pipe is empty or
full which spins for specified micro seconds. wake_up_busy_poll is called
when data is written or read to signal any busy waiting threads. A tunable
pipe_busy_poll is introduced to enable or disable busy waiting via /proc.
The value of it specifies the amount of spin in microseconds.

Signed-off-by: subhra mazumdar <subhra.mazumdar@...cle.com>
---
 fs/pipe.c                 | 58 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/pipe_fs_i.h |  1 +
 kernel/sysctl.c           |  7 ++++++
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 97e5be8..03ce76a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,6 +44,7 @@ unsigned int pipe_min_size = PAGE_SIZE;
  */
 unsigned long pipe_user_pages_hard;
 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;
 
 /*
  * We use a start+len construction, which provides full use of the 
@@ -122,6 +123,35 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	pipe_lock(pipe);
 }
 
+void pipe_busy_wait(struct pipe_inode_info *pipe)
+{
+	unsigned long wait_flag = pipe->pipe_wait_flag;
+	unsigned long start_time = pipe_busy_loop_current_time();
+
+	pipe_unlock(pipe);
+	preempt_disable();
+	for (;;) {
+		if (pipe->pipe_wait_flag > wait_flag) {
+			preempt_enable();
+			pipe_lock(pipe);
+			return;
+		}
+		if (pipe_busy_loop_timeout(pipe, start_time))
+			break;
+		cpu_relax();
+	}
+	preempt_enable();
+	pipe_lock(pipe);
+	if (pipe->pipe_wait_flag > wait_flag)
+		return;
+	pipe_wait(pipe);
+}
+
+void wake_up_busy_poll(struct pipe_inode_info *pipe)
+{
+	pipe->pipe_wait_flag++;
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -254,6 +284,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int do_wakeup;
 	ssize_t ret;
+	unsigned int poll = pipe->pipe_ll_usec;
 
 	/* Null read succeeds. */
 	if (unlikely(total_len == 0))
@@ -331,11 +362,18 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			break;
 		}
 		if (do_wakeup) {
+			if (poll)
+				wake_up_busy_poll(pipe);
 			wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
  			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 		}
-		pipe_wait(pipe);
+		if (poll)
+			pipe_busy_wait(pipe);
+		else
+			pipe_wait(pipe);
 	}
+	if (poll && do_wakeup)
+		wake_up_busy_poll(pipe);
 	__pipe_unlock(pipe);
 
 	/* Signal writers asynchronously that there is more room. */
@@ -362,6 +400,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	int do_wakeup = 0;
 	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
+	unsigned int poll = pipe->pipe_ll_usec;
 
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
@@ -467,15 +506,22 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			break;
 		}
 		if (do_wakeup) {
+			if (poll)
+				wake_up_busy_poll(pipe);
 			wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 			do_wakeup = 0;
 		}
 		pipe->waiting_writers++;
-		pipe_wait(pipe);
+		if (poll)
+			pipe_busy_wait(pipe);
+		else
+			pipe_wait(pipe);
 		pipe->waiting_writers--;
 	}
 out:
+	if (poll && do_wakeup)
+		wake_up_busy_poll(pipe);
 	__pipe_unlock(pipe);
 	if (do_wakeup) {
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
@@ -564,6 +610,7 @@ static int
 pipe_release(struct inode *inode, struct file *file)
 {
 	struct pipe_inode_info *pipe = file->private_data;
+	unsigned int poll = pipe->pipe_ll_usec;
 
 	__pipe_lock(pipe);
 	if (file->f_mode & FMODE_READ)
@@ -572,6 +619,8 @@ pipe_release(struct inode *inode, struct file *file)
 		pipe->writers--;
 
 	if (pipe->readers || pipe->writers) {
+		if (poll)
+			wake_up_busy_poll(pipe);
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
@@ -840,6 +889,7 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
 	struct file *files[2];
 	int fd[2];
 	int error;
+	struct pipe_inode_info *pipe;
 
 	error = __do_pipe_flags(fd, files, flags);
 	if (!error) {
@@ -853,6 +903,10 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
 			fd_install(fd[0], files[0]);
 			fd_install(fd[1], files[1]);
 		}
+		pipe = files[0]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
+		pipe = files[1]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
 	}
 	return error;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index fdfd2a2..3b96b05 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -188,6 +188,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 extern unsigned int pipe_max_size, pipe_min_size;
 extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
+extern unsigned int pipe_busy_poll;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d9c31bc..823bde1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1842,6 +1842,13 @@ static struct ctl_table fs_table[] = {
 		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
+		.procname	= "pipe-busy-poll",
+		.data		= &pipe_busy_poll,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+	},
+	{
 		.procname	= "mount-max",
 		.data		= &sysctl_mount_max,
 		.maxlen		= sizeof(unsigned int),
-- 
2.9.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ