[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130310011136.GA4815@dcvr.yhbt.net>
Date: Sun, 10 Mar 2013 01:11:36 +0000
From: Eric Wong <normalperson@...t.net>
To: Arve Hjønnevåg <arve@...roid.com>
Cc: NeilBrown <neilb@...e.de>, "Rafael J. Wysocki" <rjw@...k.pl>,
linux-kernel@...r.kernel.org,
Davide Libenzi <davidel@...ilserver.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
linux-fsdevel@...r.kernel.org,
Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: epoll: possible bug from wakeup_source activation
Eric Wong <normalperson@...t.net> wrote:
> Arve Hjønnevåg <arve@...roid.com> wrote:
> > On Fri, Mar 8, 2013 at 12:49 PM, Eric Wong <normalperson@...t.net> wrote:
> > > What happens if ep_modify calls ep_destroy_wakeup_source
> > > while __pm_stay_awake is running on the same epi->ws?
> >
> > Yes, that looks like a problem. I think calling
> > ep_destroy_wakeup_source with ep->lock held should fix that. It is not
> > clear how useful changing EPOLLWAKEUP in ep_modify is, so
> > alternatively we could remove that feature and instead only allow it
> > to be set in ep_insert.
>
> ep->lock would work, but ep->lock is already a source of heavy
> contention in my multithreaded+epoll webservers.
>
> Perhaps RCU can be used? I've no experience with RCU, but I've been
> meaning to get acquainted with RCU.
The following is lightly tested, at least I haven't gotten lockdep
to complain.
--------------------------------- 8< ----------------------------
>From 2bcd549893aa204bd858cc1500a70f20b28e47c1 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@...t.net>
Date: Sun, 10 Mar 2013 01:06:50 +0000
Subject: [PATCH] epoll: RCU protect wakeup_source in epitem
This prevents wakeup_source destruction when a user hits the
item with EPOLL_CTL_MOD while ep_poll_callback is running.
Signed-off-by: Eric Wong <normalperson@...t.net>
---
fs/eventpoll.c | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9fec183..e008d54 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -158,7 +158,7 @@ struct epitem {
struct list_head fllink;
/* wakeup_source used when EPOLLWAKEUP is set */
- struct wakeup_source *ws;
+ struct wakeup_source __rcu *ws;
/* The structure that describe the interested events and the source fd */
struct epoll_event event;
@@ -536,6 +536,17 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
}
}
+static inline void ep_pm_stay_awake(struct epitem *epi)
+{
+ struct wakeup_source *ws;
+
+ rcu_read_lock();
+ ws = rcu_dereference(epi->ws);
+ if (ws)
+ __pm_stay_awake(ws);
+ rcu_read_unlock();
+}
+
/**
* ep_scan_ready_list - Scans the ready list in a way that makes possible for
* the scan code, to call f_op->poll(). Also allows for
@@ -984,7 +995,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
/* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
}
/*
@@ -1146,6 +1157,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
const char *name;
+ struct wakeup_source *ws;
if (!epi->ep->ws) {
epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1154,17 +1166,22 @@ static int ep_create_wakeup_source(struct epitem *epi)
}
name = epi->ffd.file->f_path.dentry->d_name.name;
- epi->ws = wakeup_source_register(name);
- if (!epi->ws)
+ ws = wakeup_source_register(name);
+
+ if (!ws)
return -ENOMEM;
+ rcu_assign_pointer(epi->ws, ws);
return 0;
}
static void ep_destroy_wakeup_source(struct epitem *epi)
{
- wakeup_source_unregister(epi->ws);
- epi->ws = NULL;
+ struct wakeup_source *ws = epi->ws;
+
+ rcu_assign_pointer(epi->ws, NULL);
+ synchronize_rcu(); /* wait for ep_pm_stay_awake to finish */
+ wakeup_source_unregister(ws);
}
/*
@@ -1199,7 +1216,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (error)
goto error_create_wakeup_source;
} else {
- epi->ws = NULL;
+ RCU_INIT_POINTER(epi->ws, NULL);
}
/* Initialize the poll table using the queue callback */
--
Eric Wong
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists