[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20091110155316.2c3d7b6e@nehalam>
Date: Tue, 10 Nov 2009 15:53:16 -0800
From: Stephen Hemminger <shemminger@...tta.com>
To: karaluh@...aluh.pl, Ed Cashin <ecashin@...aid.com>
Cc: roel.kluin@...il.com, harvey.harrison@...il.com,
bzolnier@...il.com, netdev@...r.kernel.org
Subject: Re: [PATCH 04/10] AOE: use rcu to find network device
On Tue, 10 Nov 2009 15:06:17 -0800
Stephen Hemminger <shemminger@...tta.com> wrote:
> On Tue, 10 Nov 2009 15:01:49 -0500
> Ed Cashin <ecashin@...aid.com> wrote:
>
> > On Tue Nov 10 13:07:37 EST 2009, shemminger@...tta.com wrote:
> > > This gets rid of another use of read_lock(&dev_base_lock) by using
> > > RCU. Also, it only increments the reference count of the device actually
> > > used rather than holding and releasing every device
> > >
> > > Compile tested only.
> >
> > This function runs once a minute when the aoe driver is loaded,
> > if you'd like to test it a bit more.
> >
> > It looks like there's no dev_put corresponding to the dev_hold
> > after the changes.
> >
>
> Hmm, looks like AOE actually is not ref counting the network device.
> So my patch is incorrect.
>
> As it stands (before my patch), it is UNSAFE. It can decide to queue
> packets to a device that is removed out from underneath it causing
> reference to freed memory.
>
> Moving the rcu_read_lock up to aoecmd_cfg() would solve that but the
> whole driver appears to be unsafe about device refcounting and handling
> device removal properly.
>
> It needs to:
>
> 1. Get a device ref count when it remembers a device: (ie addif)
> 2. Install a notifier that looks for device removal events
> 3. In notifier, remove interface, including flushing all pending
> skb's for that device.
>
> This obviously is beyond the scope of the RCU stuff.
Here is a patch to get you going, it does compile but it probably
won't work because the code doesn't handle the case of the last
device going away from a target. This is yet another pre-existing
bug, since if a timeout happens: ejectif() is called to remove a device,
resend() will BUG in ifrotate() if all devices are gone.
---
drivers/block/aoe/aoe.h | 2 ++
drivers/block/aoe/aoecmd.c | 19 +++++++++++++++++++
drivers/block/aoe/aoedev.c | 14 ++++++++++++++
drivers/block/aoe/aoemain.c | 24 ++++++++++++++++++++++++
4 files changed, 59 insertions(+)
--- a/drivers/block/aoe/aoecmd.c 2009-11-10 15:13:25.673859220 -0800
+++ b/drivers/block/aoe/aoecmd.c 2009-11-10 15:49:20.009047132 -0800
@@ -413,6 +413,8 @@ addif(struct aoetgt *t, struct net_devic
p = getif(t, NULL);
if (!p)
return NULL;
+
+ dev_hold(nd);
p->nd = nd;
p->maxbcnt = DEFAULTBCNT;
p->lost = 0;
@@ -424,12 +426,29 @@ static void
ejectif(struct aoetgt *t, struct aoeif *ifp)
{
struct aoeif *e;
+ struct net_device *nd;
ulong n;
e = t->ifs + NAOEIFS - 1;
+ nd = e->nd;
n = (e - ifp) * sizeof *ifp;
memmove(ifp, ifp+1, n);
e->nd = NULL;
+ dev_put(nd);
+}
+
+void aoecmd_flushnet(struct aoedev *d, struct net_device *nd)
+{
+ struct aoetgt **tt, **te;
+ tt = d->targets;
+ te = tt + NTARGETS;
+ for (; tt < te && *tt; tt++) {
+ struct aoetgt *t = *tt;
+ struct aoeif *ifp;
+
+ while ( (ifp = getif(t, nd)) )
+ ejectif(t, ifp);
+ }
}
static int
--- a/drivers/block/aoe/aoemain.c 2009-11-10 15:13:25.696859195 -0800
+++ b/drivers/block/aoe/aoemain.c 2009-11-10 15:48:43.352047188 -0800
@@ -8,6 +8,8 @@
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
#include "aoe.h"
MODULE_LICENSE("GPL");
@@ -54,11 +56,28 @@ discover_timer(ulong vp)
}
}
+/* Callback on change of state of network device. */
+static int aoe_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *nd = ptr;
+
+ if (is_aoe_netif(nd) && event == NETDEV_UNREGISTER)
+ aoedev_ejectnet(nd);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block aoe_notifier = {
+ .notifier_call = aoe_device_event,
+};
+
static void
aoe_exit(void)
{
discover_timer(TKILL);
+ unregister_netdevice_notifier(&aoe_notifier);
aoenet_exit();
unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
aoechr_exit();
@@ -83,6 +102,9 @@ aoe_init(void)
ret = aoenet_init();
if (ret)
goto net_fail;
+ ret = register_netdevice_notifier(&aoe_notifier);
+ if (ret)
+ goto notifier_fail;
ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ERR "aoe: can't register major\n");
@@ -94,6 +116,8 @@ aoe_init(void)
return 0;
blkreg_fail:
+ unregister_netdevice_notifier(&aoe_notifier);
+ notifier_fail:
aoenet_exit();
net_fail:
aoeblk_exit();
--- a/drivers/block/aoe/aoe.h 2009-11-10 15:36:07.775921768 -0800
+++ b/drivers/block/aoe/aoe.h 2009-11-10 15:43:14.972984754 -0800
@@ -186,6 +186,7 @@ void aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(struct work_struct *);
void aoecmd_cleanslate(struct aoedev *);
+void aoecmd_flushnet(struct aoedev *, struct net_device *);
struct sk_buff *aoecmd_ata_id(struct aoedev *);
int aoedev_init(void);
@@ -194,6 +195,7 @@ struct aoedev *aoedev_by_aoeaddr(int maj
struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
void aoedev_downdev(struct aoedev *d);
int aoedev_flush(const char __user *str, size_t size);
+void aoedev_ejectnet(struct net_device *);
int aoenet_init(void);
void aoenet_exit(void);
--- a/drivers/block/aoe/aoedev.c 2009-11-10 15:13:25.685859893 -0800
+++ b/drivers/block/aoe/aoedev.c 2009-11-10 15:46:19.430861404 -0800
@@ -162,6 +162,20 @@ aoedev_flush(const char __user *str, siz
return 0;
}
+void aoedev_ejectnet(struct net_device *nd)
+{
+ struct aoedev *d;
+ unsigned long flags;
+
+ spin_lock_irqsave(&devlist_lock, flags);
+ for (d = devlist; d; d = d->next) {
+ spin_lock(&d->lock);
+ aoecmd_flushnet(d, nd);
+ spin_unlock(&d->lock);
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+}
+
/* I'm not really sure that this is a realistic problem, but if the
network driver goes gonzo let's just leak memory after complaining. */
static void
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists