lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120501162811.26261d1d@notabene.brown>
Date:	Tue, 1 May 2012 16:28:11 +1000
From:	NeilBrown <neilb@...e.de>
To:	Arve Hjønnevåg <arve@...roid.com>
Cc:	"Rafael J. Wysocki" <rjw@...k.pl>,
	Linux PM list <linux-pm@...r.kernel.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Magnus Damm <magnus.damm@...il.com>, markgross@...gnar.org,
	Matthew Garrett <mjg@...hat.com>,
	Greg KH <gregkh@...uxfoundation.org>,
	John Stultz <john.stultz@...aro.org>,
	Brian Swetland <swetland@...gle.com>,
	Alan Stern <stern@...land.harvard.edu>,
	Dmitry Torokhov <dmitry.torokhov@...il.com>,
	"Srivatsa S. Bhat" <srivatsa.bhat@...ux.vnet.ibm.com>
Subject: Re: [PATCH] epoll: Add a flag, EPOLLWAKEUP, to prevent suspend
 while epoll events are ready

On Mon, 30 Apr 2012 22:33:48 -0700 Arve Hjønnevåg <arve@...roid.com> wrote:

> When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a
> wakeup_source will be active to prevent suspend. This can be used to
> handle wakeup events from a driver that support poll, e.g. input, if
> that driver wakes up the waitqueue passed to epoll before allowing
> suspend.
> 
> Signed-off-by: Arve Hjønnevåg <arve@...roid.com>
> Signed-off-by: Rafael J. Wysocki <rjw@...k.pl>

Thanks.
 Reviewed-by: NeilBrown <neilb@...e.de>

However:
1/ I think all references to "automatic system suspend" can be replaced with
   "system suspend" as an active wakeup_source disables any suspend, no matter
   it's source
2/ I reserve to right to submit for discussion a later patch which removes
   the ep->ws in favour or some other exclusion mechanism :-)

NeilBrown



> ---
>  fs/eventpoll.c             |   90 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/capability.h |    5 ++-
>  include/linux/eventpoll.h  |   12 ++++++
>  3 files changed, 103 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 739b098..1abed50 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -33,6 +33,7 @@
>  #include <linux/bitops.h>
>  #include <linux/mutex.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/device.h>
>  #include <asm/uaccess.h>
>  #include <asm/io.h>
>  #include <asm/mman.h>
> @@ -87,7 +88,7 @@
>   */
>  
>  /* Epoll private bits inside the event mask */
> -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
> +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
>  
>  /* Maximum number of nesting allowed inside epoll sets */
>  #define EP_MAX_NESTS 4
> @@ -154,6 +155,9 @@ struct epitem {
>  	/* List header used to link this item to the "struct file" items list */
>  	struct list_head fllink;
>  
> +	/* wakeup_source used when EPOLLWAKEUP is set */
> +	struct wakeup_source *ws;
> +
>  	/* The structure that describe the interested events and the source fd */
>  	struct epoll_event event;
>  };
> @@ -194,6 +198,9 @@ struct eventpoll {
>  	 */
>  	struct epitem *ovflist;
>  
> +	/* wakeup_source used when ep_scan_ready_list is running */
> +	struct wakeup_source *ws;
> +
>  	/* The user that created the eventpoll descriptor */
>  	struct user_struct *user;
>  
> @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  		 * queued into ->ovflist but the "txlist" might already
>  		 * contain them, and the list_splice() below takes care of them.
>  		 */
> -		if (!ep_is_linked(&epi->rdllink))
> +		if (!ep_is_linked(&epi->rdllink)) {
>  			list_add_tail(&epi->rdllink, &ep->rdllist);
> +			__pm_stay_awake(epi->ws);
> +		}
>  	}
>  	/*
>  	 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
> @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  	 * Quickly re-inject items left on "txlist".
>  	 */
>  	list_splice(&txlist, &ep->rdllist);
> +	__pm_relax(ep->ws);
>  
>  	if (!list_empty(&ep->rdllist)) {
>  		/*
> @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
>  		list_del_init(&epi->rdllink);
>  	spin_unlock_irqrestore(&ep->lock, flags);
>  
> +	wakeup_source_unregister(epi->ws);
> +
>  	/* At this point it is safe to free the eventpoll item */
>  	kmem_cache_free(epi_cache, epi);
>  
> @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep)
>  	mutex_unlock(&epmutex);
>  	mutex_destroy(&ep->mtx);
>  	free_uid(ep->user);
> +	wakeup_source_unregister(ep->ws);
>  	kfree(ep);
>  }
>  
> @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
>  			 * callback, but it's not actually ready, as far as
>  			 * caller requested events goes. We can remove it here.
>  			 */
> +			__pm_relax(epi->ws);
>  			list_del_init(&epi->rdllink);
>  		}
>  	}
> @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
>  		if (epi->next == EP_UNACTIVE_PTR) {
>  			epi->next = ep->ovflist;
>  			ep->ovflist = epi;
> +			if (epi->ws) {
> +				/*
> +				 * Activate ep->ws since epi->ws may get
> +				 * deactivated at any time.
> +				 */
> +				__pm_stay_awake(ep->ws);
> +			}
> +
>  		}
>  		goto out_unlock;
>  	}
>  
>  	/* If this file is already in the ready list we exit soon */
> -	if (!ep_is_linked(&epi->rdllink))
> +	if (!ep_is_linked(&epi->rdllink)) {
>  		list_add_tail(&epi->rdllink, &ep->rdllist);
> +		__pm_stay_awake(epi->ws);
> +	}
>  
>  	/*
>  	 * Wake up ( if active ) both the eventpoll wait list and the ->poll()
> @@ -1091,6 +1115,30 @@ static int reverse_path_check(void)
>  	return error;
>  }
>  
> +static int ep_create_wakeup_source(struct epitem *epi)
> +{
> +	const char *name;
> +
> +	if (!epi->ep->ws) {
> +		epi->ep->ws = wakeup_source_register("eventpoll");
> +		if (!epi->ep->ws)
> +			return -ENOMEM;
> +	}
> +
> +	name = epi->ffd.file->f_path.dentry->d_name.name;
> +	epi->ws = wakeup_source_register(name);
> +	if (!epi->ws)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void ep_destroy_wakeup_source(struct epitem *epi)
> +{
> +	wakeup_source_unregister(epi->ws);
> +	epi->ws = NULL;
> +}
> +
>  /*
>   * Must be called with "mtx" held.
>   */
> @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  	epi->event = *event;
>  	epi->nwait = 0;
>  	epi->next = EP_UNACTIVE_PTR;
> +	if (epi->event.events & EPOLLWAKEUP) {
> +		error = ep_create_wakeup_source(epi);
> +		if (error)
> +			goto error_create_wakeup_source;
> +	} else {
> +		epi->ws = NULL;
> +	}
>  
>  	/* Initialize the poll table using the queue callback */
>  	epq.epi = epi;
> @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  	/* If the file is already "ready" we drop it inside the ready list */
>  	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
>  		list_add_tail(&epi->rdllink, &ep->rdllist);
> +		__pm_stay_awake(epi->ws);
>  
>  		/* Notify waiting tasks that events are available */
>  		if (waitqueue_active(&ep->wq))
> @@ -1204,6 +1260,9 @@ error_unregister:
>  		list_del_init(&epi->rdllink);
>  	spin_unlock_irqrestore(&ep->lock, flags);
>  
> +	wakeup_source_unregister(epi->ws);
> +
> +error_create_wakeup_source:
>  	kmem_cache_free(epi_cache, epi);
>  
>  	return error;
> @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  	epi->event.events = event->events;
>  	pt._key = event->events;
>  	epi->event.data = event->data; /* protected by mtx */
> +	if (epi->event.events & EPOLLWAKEUP) {
> +		if (!epi->ws)
> +			ep_create_wakeup_source(epi);
> +	} else if (epi->ws) {
> +		ep_destroy_wakeup_source(epi);
> +	}
>  
>  	/*
>  	 * Get current event bits. We can safely use the file* here because
> @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  		spin_lock_irq(&ep->lock);
>  		if (!ep_is_linked(&epi->rdllink)) {
>  			list_add_tail(&epi->rdllink, &ep->rdllist);
> +			__pm_stay_awake(epi->ws);
>  
>  			/* Notify waiting tasks that events are available */
>  			if (waitqueue_active(&ep->wq))
> @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>  	     !list_empty(head) && eventcnt < esed->maxevents;) {
>  		epi = list_first_entry(head, struct epitem, rdllink);
>  
> +		/*
> +		 * Activate ep->ws before deactivating epi->ws to prevent
> +		 * triggering auto-suspend here (in case we reactive epi->ws
> +		 * below).
> +		 *
> +		 * This could be rearranged to delay the deactivation of epi->ws
> +		 * instead, but then epi->ws would temporarily be out of sync
> +		 * with ep_is_linked().
> +		 */
> +		if (epi->ws && epi->ws->active)
> +			__pm_stay_awake(ep->ws);
> +		__pm_relax(epi->ws);
>  		list_del_init(&epi->rdllink);
>  
>  		pt._key = epi->event.events;
> @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>  			if (__put_user(revents, &uevent->events) ||
>  			    __put_user(epi->event.data, &uevent->data)) {
>  				list_add(&epi->rdllink, head);
> +				__pm_stay_awake(epi->ws);
>  				return eventcnt ? eventcnt : -EFAULT;
>  			}
>  			eventcnt++;
> @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>  				 * poll callback will queue them in ep->ovflist.
>  				 */
>  				list_add_tail(&epi->rdllink, &ep->rdllist);
> +				__pm_stay_awake(epi->ws);
>  			}
>  		}
>  	}
> @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
>  	if (!tfile->f_op || !tfile->f_op->poll)
>  		goto error_tgt_fput;
>  
> +	/* Check if EPOLLWAKEUP is allowed */
> +	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
> +		goto error_tgt_fput;
> +
>  	/*
>  	 * We have to check that the file structure underneath the file descriptor
>  	 * the user passed to us _is_ an eventpoll file. And also we do not permit
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index 12d52de..222974a 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data {
>  
>  #define CAP_WAKE_ALARM            35
>  
> +/* Allow preventing automatic system suspends while epoll events are pending */
>  
> -#define CAP_LAST_CAP         CAP_WAKE_ALARM
> +#define CAP_EPOLLWAKEUP      36
> +
> +#define CAP_LAST_CAP         CAP_EPOLLWAKEUP
>  
>  #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
>  
> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
> index 657ab55..5b591fb 100644
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -26,6 +26,18 @@
>  #define EPOLL_CTL_DEL 2
>  #define EPOLL_CTL_MOD 3
>  
> +/*
> + * Request the handling of system wakeup events so as to prevent automatic
> + * system suspends from happening while those events are being processed.
> + *
> + * Assuming neither EPOLLET nor EPOLLONESHOT is set, automatic system suspends
> + * will not be re-allowed until epoll_wait is called again after consuming the
> + * wakeup event(s).
> + *
> + * Requires CAP_EPOLLWAKEUP
> + */
> +#define EPOLLWAKEUP (1 << 29)
> +
>  /* Set the One Shot behaviour for the target file descriptor */
>  #define EPOLLONESHOT (1 << 30)
>  


Download attachment "signature.asc" of type "application/pgp-signature" (829 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ