lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180925233240.24451-2-subhra.mazumdar@oracle.com>
Date:   Tue, 25 Sep 2018 16:32:40 -0700
From:   subhra mazumdar <subhra.mazumdar@...cle.com>
To:     linux-kernel@...r.kernel.org
Cc:     peterz@...radead.org, tglx@...utronix.de, dhaval.giani@...cle.com,
        steven.sistare@...cle.com
Subject: [RFC PATCH v2 1/1] pipe: busy wait for pipe

Introduce pipe_ll_usec field for pipes that indicates the amount of micro
seconds a thread should spin if pipe is empty or full before sleeping. This
is similar to network sockets. Workloads like hackbench in pipe mode
benefits significantly from this by avoiding the sleep and wakeup overhead.
Other similar usecases can benefit. A tunable pipe_busy_poll is introduced
to enable or disable busy waiting via /proc. The value of it specifies the
amount of spin in microseconds. Default value is 0 indicating no spin.

Signed-off-by: subhra mazumdar <subhra.mazumdar@...cle.com>
---
 fs/pipe.c                 | 12 ++++++++++++
 include/linux/pipe_fs_i.h |  2 ++
 kernel/sysctl.c           |  7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c..35d805b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,6 +26,7 @@
 
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
+#include <linux/sched/clock.h>
 
 #include "internal.h"
 
@@ -40,6 +41,7 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned long pipe_user_pages_hard;
 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;
 
 /*
  * We use a start+len construction, which provides full use of the 
@@ -106,6 +108,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 void pipe_wait(struct pipe_inode_info *pipe)
 {
 	DEFINE_WAIT(wait);
+	u64 start;
 
 	/*
 	 * Pipes are system-local resources, so sleeping on them
@@ -113,6 +116,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	 */
 	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
 	pipe_unlock(pipe);
+	start = local_clock();
+	while (current->state != TASK_RUNNING &&
+	       ((local_clock() - start) >> 10) < pipe->pipe_ll_usec)
+		cpu_relax();
 	schedule();
 	finish_wait(&pipe->wait, &wait);
 	pipe_lock(pipe);
@@ -825,6 +832,7 @@ static int do_pipe2(int __user *fildes, int flags)
 	struct file *files[2];
 	int fd[2];
 	int error;
+	struct pipe_inode_info *pipe;
 
 	error = __do_pipe_flags(fd, files, flags);
 	if (!error) {
@@ -838,6 +846,10 @@ static int do_pipe2(int __user *fildes, int flags)
 			fd_install(fd[0], files[0]);
 			fd_install(fd[1], files[1]);
 		}
+		pipe = files[0]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
+		pipe = files[1]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
 	}
 	return error;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b..73267d2 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -55,6 +55,7 @@ struct pipe_inode_info {
 	unsigned int waiting_writers;
 	unsigned int r_counter;
 	unsigned int w_counter;
+	unsigned int pipe_ll_usec;
 	struct page *tmp_page;
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
@@ -170,6 +171,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
+extern unsigned int pipe_busy_poll;
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050..0e9ce0c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1863,6 +1863,13 @@ static struct ctl_table fs_table[] = {
 		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
+		.procname       = "pipe-busy-poll",
+		.data           = &pipe_busy_poll,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+	},
+	{
 		.procname	= "mount-max",
 		.data		= &sysctl_mount_max,
 		.maxlen		= sizeof(unsigned int),
-- 
2.9.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ