lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1372172737-18726-1-git-send-email-dvlasenk@redhat.com>
Date:	Tue, 25 Jun 2013 17:05:37 +0200
From:	Denys Vlasenko <dvlasenk@...hat.com>
To:	linux-kernel@...r.kernel.org
Cc:	Denys Vlasenko <dvlasenk@...hat.com>,
	Oleg Nesterov <oleg@...hat.com>
Subject: [PATCH] epoll_[p]wait: fix spurious -EINTR on ptrace attach

Before this change, epoll_wait and epoll_pwait
spuriously return with -EINTR on ptrace attach.

By analogy with poll syscall family, epoll_[p]wait should be interruptible
by signals regardless of SA_RESTART, therefore, this change
makes them return -ERESTARTNOHAND if timeout has expired.

If timeout has not expired, and if interrupted by harmless SIG_DFL'ed
signals such as SIGWINCH and other signal-like activity
such as ptrace attach, epoll_[p]wait should restart with decreased timeout.
To that end, set up restart block and return -ERESTART_RESTARTBLOCK.

In order to define a "sigset_t ksigmask" member, I had to add
<asm/signal.h> to linux/thread_info.h, which threw some
build errors about undefined smp barriers and such.
Removing <linux/seqlock.h> include from linux/time.h
solved it.

Run-tested.

Signed-off-by: Denys Vlasenko <dvlasenk@...hat.com>
CC: Oleg Nesterov <oleg@...hat.com>
---
 fs/eventpoll.c              | 146 ++++++++++++++++++++++++++------------------
 include/linux/thread_info.h |  10 +++
 include/linux/time.h        |   1 -
 3 files changed, 98 insertions(+), 59 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0cff4434..1df35aa 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1911,16 +1911,16 @@ error_return:
 	return error;
 }
 
-/*
- * Implement the event wait interface for the eventpoll file. It is the kernel
- * part of the user space epoll_wait(2).
- */
-SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
-		int, maxevents, int, timeout)
+static long do_restart_epoll_pwait(struct restart_block *restart_block);
+
+static long do_epoll_pwait(int epfd, struct epoll_event __user *events,
+		int maxevents, int timeout, const sigset_t *sigmask)
 {
 	int error;
+	sigset_t ksigmask, ksigsaved;
 	struct fd f;
 	struct eventpoll *ep;
+	struct timespec start;
 
 	/* The maximum number of event must be greater than zero */
 	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
@@ -1944,19 +1944,97 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 		goto error_fput;
 
 	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		ksigmask = *sigmask;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &ksigsaved);
+	}
+
+	/*
 	 * At this point it is safe to assume that the "private_data" contains
 	 * our own data structure.
 	 */
 	ep = f.file->private_data;
 
+	if (timeout > 0)
+		ktime_get_ts(&start);
+
 	/* Time to fish for events ... */
 	error = ep_poll(ep, events, maxevents, timeout);
 
+	if (error == -EINTR) {
+		error = -ERESTARTNOHAND;
+		if (timeout > 0) {
+			struct restart_block *restart_block;
+			struct timespec end;
+			ktime_t rem;
+			s64 rem_msec;
+
+			ktime_get_ts(&end);
+			rem = ktime_sub(timespec_to_ktime(end), timespec_to_ktime(start));
+			rem_msec = ktime_to_ms(rem);
+			if (rem_msec > 0) {
+				restart_block = &current_thread_info()->restart_block;
+				restart_block->fn = do_restart_epoll_pwait;
+				restart_block->epoll.epfd = epfd;
+				restart_block->epoll.events = events;
+				restart_block->epoll.maxevents = maxevents;
+				restart_block->epoll.timeout = rem_msec;
+				restart_block->epoll.has_ksigmask = (sigmask != NULL);
+				restart_block->epoll.ksigmask = ksigmask;
+				error = -ERESTART_RESTARTBLOCK;
+			}
+		}
+		/*
+		 * We've got a signal while waiting, we do not restore the
+		 * signal mask yet, and we allow do_signal() to deliver the signal on
+		 * the way back to userspace, before the signal mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &ksigsaved,
+			       sizeof(ksigsaved));
+			set_restore_sigmask();
+		}
+	}
+	else
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 */
+	if (sigmask) {
+		sigprocmask(SIG_SETMASK, &ksigsaved, NULL);
+	}
+
 error_fput:
 	fdput(f);
 	return error;
 }
 
+static long do_restart_epoll_pwait(struct restart_block *restart_block)
+{
+	int epfd = restart_block->epoll.epfd;
+	struct epoll_event __user *events = restart_block->epoll.events;
+	int maxevents = restart_block->epoll.maxevents;
+	int timeout = restart_block->epoll.timeout;
+	sigset_t *ksigmask = restart_block->epoll.has_ksigmask
+				? &restart_block->epoll.ksigmask
+				: NULL;
+
+	return do_epoll_pwait(epfd, events, maxevents, timeout, ksigmask);
+}
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_wait(2).
+ */
+SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
+		int, maxevents, int, timeout)
+{
+	return do_epoll_pwait(epfd, events, maxevents, timeout, NULL);
+}
+
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
  * part of the user space epoll_pwait(2).
@@ -1965,40 +2043,16 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 		int, maxevents, int, timeout, const sigset_t __user *, sigmask,
 		size_t, sigsetsize)
 {
-	int error;
-	sigset_t ksigmask, sigsaved;
+	sigset_t ksigmask;
 
-	/*
-	 * If the caller wants a certain signal mask to be set during the wait,
-	 * we apply it here.
-	 */
 	if (sigmask) {
 		if (sigsetsize != sizeof(sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
 			return -EFAULT;
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
 	}
 
-	error = sys_epoll_wait(epfd, events, maxevents, timeout);
-
-	/*
-	 * If we changed the signal mask, we need to restore the original one.
-	 * In case we've got a signal while waiting, we do not restore the
-	 * signal mask yet, and we allow do_signal() to deliver the signal on
-	 * the way back to userspace, before the signal mask is restored.
-	 */
-	if (sigmask) {
-		if (error == -EINTR) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		} else
-			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-	}
-
-	return error;
+	return do_epoll_pwait(epfd, events, maxevents, timeout, sigmask ? &ksigmask : NULL);
 }
 
 #ifdef CONFIG_COMPAT
@@ -2008,42 +2062,18 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 			const compat_sigset_t __user *, sigmask,
 			compat_size_t, sigsetsize)
 {
-	long err;
 	compat_sigset_t csigmask;
-	sigset_t ksigmask, sigsaved;
+	sigset_t ksigmask;
 
-	/*
-	 * If the caller wants a certain signal mask to be set during the wait,
-	 * we apply it here.
-	 */
 	if (sigmask) {
 		if (sigsetsize != sizeof(compat_sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
 			return -EFAULT;
 		sigset_from_compat(&ksigmask, &csigmask);
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-	}
-
-	err = sys_epoll_wait(epfd, events, maxevents, timeout);
-
-	/*
-	 * If we changed the signal mask, we need to restore the original one.
-	 * In case we've got a signal while waiting, we do not restore the
-	 * signal mask yet, and we allow do_signal() to deliver the signal on
-	 * the way back to userspace, before the signal mask is restored.
-	 */
-	if (sigmask) {
-		if (err == -EINTR) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		} else
-			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 	}
 
-	return err;
+	return do_epoll_pwait(epfd, events, maxevents, timeout, sigmask ? &ksigmask : NULL);
 }
 #endif
 
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e7e0473..1a7e0c6 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -8,6 +8,7 @@
 #define _LINUX_THREAD_INFO_H
 
 #include <linux/types.h>
+#include <asm/signal.h>
 #include <linux/bug.h>
 
 struct timespec;
@@ -45,6 +46,15 @@ struct restart_block {
 			unsigned long tv_sec;
 			unsigned long tv_nsec;
 		} poll;
+		/* For epoll_[p]wait */
+		struct {
+			int epfd;
+			int maxevents;
+			int timeout;
+			int has_ksigmask;
+			struct epoll_event __user *events;
+			sigset_t ksigmask;
+		} epoll;
 	};
 };
 
diff --git a/include/linux/time.h b/include/linux/time.h
index d5d229b..f0c1152 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -2,7 +2,6 @@
 #define _LINUX_TIME_H
 
 # include <linux/cache.h>
-# include <linux/seqlock.h>
 # include <linux/math64.h>
 #include <uapi/linux/time.h>
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ