[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1994320084.165913.1313302071630.JavaMail.root@zmail05.collab.prod.int.phx2.redhat.com>
Date: Sun, 14 Aug 2011 02:07:51 -0400 (EDT)
From: Jason Wang <jasowang@...hat.com>
To: paulmck@...ux.vnet.ibm.com
Cc: mst@...hat.com, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org,
virtualization@...ts.linux-foundation.org, davem@...emloft.net,
krkumar2@...ibm.com, rusty@...tcorp.com.au, qemu-devel@...gnu.org,
kvm@...r.kernel.org, mirq-linux@...e.qmqm.pl
Subject: Re: [net-next RFC PATCH 4/7] tuntap: multiqueue support
----- Original Message -----
> On Fri, Aug 12, 2011 at 09:55:20AM +0800, Jason Wang wrote:
> > With the abstraction that each socket were a backend of a
> > queue for userspace, this patch adds multiqueue support for
> > tap device by allowing multiple sockets to be attached to a
> > tap device. Then we could parallize the transmission by put
> > them into different socket.
> >
> > As queue related information were stored in private_data of
> > file new, we could simply implement the multiqueue support
> > by add an array of pointers to sockets were stored in the
> > tap device. Then ioctls may be added to manipulate those
> > pointers for adding or removing queues.
> >
> > In order to let tx path lockless, NETIF_F_LLTX were used for
> > multiqueue tap device. And RCU is used for doing
> > synchronization between packet handling and system calls
> > such as removing queues.
> >
> > Currently, multiqueue support is limited for tap , but it's
> > easy also enable it for tun if we find it was also helpful.
>
> Question below about calls to tun_get_slot().
>
> Thanx, Paul
>
> > Signed-off-by: Jason Wang <jasowang@...hat.com>
> > ---
> > drivers/net/tun.c | 376
> > ++++++++++++++++++++++++++++++++++-------------------
> > 1 files changed, 243 insertions(+), 133 deletions(-)
> >
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 4cd292a..8bc6dff 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -108,6 +108,8 @@ struct tap_filter {
> > unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
> > };
> >
> > +#define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
> > +
> > struct tun_file {
> > struct sock sk;
> > struct socket socket;
> > @@ -115,7 +117,7 @@ struct tun_file {
> > int vnet_hdr_sz;
> > struct tap_filter txflt;
> > atomic_t count;
> > - struct tun_struct *tun;
> > + struct tun_struct __rcu *tun;
> > struct net *net;
> > struct fasync_struct *fasync;
> > unsigned int flags;
> > @@ -124,7 +126,8 @@ struct tun_file {
> > struct tun_sock;
> >
> > struct tun_struct {
> > - struct tun_file *tfile;
> > + struct tun_file *tfiles[MAX_TAP_QUEUES];
> > + unsigned int numqueues;
> > unsigned int flags;
> > uid_t owner;
> > gid_t group;
> > @@ -139,80 +142,183 @@ struct tun_struct {
> > #endif
> > };
> >
> > -static int tun_attach(struct tun_struct *tun, struct file *file)
> > +static DEFINE_SPINLOCK(tun_lock);
> > +
> > +/*
> > + * get_slot: return a [unused/occupied] slot in tun->tfiles[]:
> > + * - if 'f' is NULL, return the first empty slot;
> > + * - otherwise, return the slot this pointer occupies.
> > + */
> > +static int tun_get_slot(struct tun_struct *tun, struct tun_file
> > *tfile)
> > {
> > - struct tun_file *tfile = file->private_data;
> > - int err;
> > + int i;
> >
> > - ASSERT_RTNL();
> > + for (i = 0; i < MAX_TAP_QUEUES; i++) {
> > + if (rcu_dereference(tun->tfiles[i]) == tfile)
> > + return i;
> > + }
> >
> > - netif_tx_lock_bh(tun->dev);
> > + /* Should never happen */
> > + BUG_ON(1);
> > +}
> >
> > - err = -EINVAL;
> > - if (tfile->tun)
> > - goto out;
> > +/*
> > + * tun_get_queue(): calculate the queue index
> > + * - if skbs comes from mq nics, we can just borrow
> > + * - if not, calculate from the hash
> > + */
> > +static struct tun_file *tun_get_queue(struct net_device *dev,
> > + struct sk_buff *skb)
> > +{
> > + struct tun_struct *tun = netdev_priv(dev);
> > + struct tun_file *tfile = NULL;
> > + int numqueues = tun->numqueues;
> > + __u32 rxq;
> >
> > - err = -EBUSY;
> > - if (tun->tfile)
> > + BUG_ON(!rcu_read_lock_held());
> > +
> > + if (!numqueues)
> > goto out;
> >
> > - err = 0;
> > - tfile->tun = tun;
> > - tun->tfile = tfile;
> > - netif_carrier_on(tun->dev);
> > - dev_hold(tun->dev);
> > - sock_hold(&tfile->sk);
> > - atomic_inc(&tfile->count);
> > + if (likely(skb_rx_queue_recorded(skb))) {
> > + rxq = skb_get_rx_queue(skb);
> > +
> > + while (unlikely(rxq >= numqueues))
> > + rxq -= numqueues;
> > +
> > + tfile = rcu_dereference(tun->tfiles[rxq]);
> > + if (tfile)
> > + goto out;
> > + }
> > +
> > + /* Check if we can use flow to select a queue */
> > + rxq = skb_get_rxhash(skb);
> > + if (rxq) {
> > + tfile = rcu_dereference(tun->tfiles[rxq % numqueues]);
> > + if (tfile)
> > + goto out;
> > + }
> > +
> > + /* Everything failed - find first available queue */
> > + for (rxq = 0; rxq < MAX_TAP_QUEUES; rxq++) {
> > + tfile = rcu_dereference(tun->tfiles[rxq]);
> > + if (tfile)
> > + break;
> > + }
> >
> > out:
> > - netif_tx_unlock_bh(tun->dev);
> > - return err;
> > + return tfile;
> > }
> >
> > -static void __tun_detach(struct tun_struct *tun)
> > +static int tun_detach(struct tun_file *tfile, bool clean)
> > {
> > - struct tun_file *tfile = tun->tfile;
> > - /* Detach from net device */
> > - netif_tx_lock_bh(tun->dev);
> > - netif_carrier_off(tun->dev);
> > - tun->tfile = NULL;
> > - netif_tx_unlock_bh(tun->dev);
> > -
> > - /* Drop read queue */
> > - skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
> > -
> > - /* Drop the extra count on the net device */
> > - dev_put(tun->dev);
> > -}
> > + struct tun_struct *tun;
> > + struct net_device *dev = NULL;
> > + bool destroy = false;
> >
> > -static void tun_detach(struct tun_struct *tun)
> > -{
> > - rtnl_lock();
> > - __tun_detach(tun);
> > - rtnl_unlock();
> > -}
> > + spin_lock(&tun_lock);
> >
> > -static struct tun_struct *__tun_get(struct tun_file *tfile)
> > -{
> > - struct tun_struct *tun = NULL;
> > + tun = rcu_dereference_protected(tfile->tun,
> > + lockdep_is_held(&tun_lock));
> > + if (tun) {
> > + int index = tun_get_slot(tun, tfile);
>
> Don't we need to be in an RCU read-side critical section in order to
> safely call tun_get_slot()?
>
> Or is the fact that we are calling with tun_lock held sufficient?
> If the latter, then the rcu_dereference() in tun_get_slot() should
> use rcu_dereference_protected() rather than rcu_dereference().
>
Nice catch. The latter, tun_lock held is sufficient. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists