lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 14 Aug 2011 02:07:51 -0400 (EDT)
From:	Jason Wang <jasowang@...hat.com>
To:	paulmck@...ux.vnet.ibm.com
Cc:	mst@...hat.com, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	virtualization@...ts.linux-foundation.org, davem@...emloft.net,
	krkumar2@...ibm.com, rusty@...tcorp.com.au, qemu-devel@...gnu.org,
	kvm@...r.kernel.org, mirq-linux@...e.qmqm.pl
Subject: Re: [net-next RFC PATCH 4/7] tuntap: multiqueue support



----- Original Message -----
> On Fri, Aug 12, 2011 at 09:55:20AM +0800, Jason Wang wrote:
> > With the abstraction that each socket were a backend of a
> > queue for userspace, this patch adds multiqueue support for
> > tap device by allowing multiple sockets to be attached to a
> > tap device. Then we could parallize the transmission by put
> > them into different socket.
> >
> > As queue related information were stored in private_data of
> > file new, we could simply implement the multiqueue support
> > by add an array of pointers to sockets were stored in the
> > tap device. Then ioctls may be added to manipulate those
> > pointers for adding or removing queues.
> >
> > In order to let tx path lockless, NETIF_F_LLTX were used for
> > multiqueue tap device. And RCU is used for doing
> > synchronization between packet handling and system calls
> > such as removing queues.
> >
> > Currently, multiqueue support is limited for tap , but it's
> > easy also enable it for tun if we find it was also helpful.
> 
> Question below about calls to tun_get_slot().
> 
> Thanx, Paul
> 
> > Signed-off-by: Jason Wang <jasowang@...hat.com>
> > ---
> >  drivers/net/tun.c | 376
> >  ++++++++++++++++++++++++++++++++++-------------------
> >  1 files changed, 243 insertions(+), 133 deletions(-)
> >
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 4cd292a..8bc6dff 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -108,6 +108,8 @@ struct tap_filter {
> >  	unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
> >  };
> >
> > +#define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
> > +
> >  struct tun_file {
> >  	struct sock sk;
> >  	struct socket socket;
> > @@ -115,7 +117,7 @@ struct tun_file {
> >  	int vnet_hdr_sz;
> >  	struct tap_filter txflt;
> >  	atomic_t count;
> > - struct tun_struct *tun;
> > + struct tun_struct __rcu *tun;
> >  	struct net *net;
> >  	struct fasync_struct *fasync;
> >  	unsigned int flags;
> > @@ -124,7 +126,8 @@ struct tun_file {
> >  struct tun_sock;
> >
> >  struct tun_struct {
> > - struct tun_file *tfile;
> > + struct tun_file *tfiles[MAX_TAP_QUEUES];
> > + unsigned int numqueues;
> >  	unsigned int flags;
> >  	uid_t owner;
> >  	gid_t group;
> > @@ -139,80 +142,183 @@ struct tun_struct {
> >  #endif
> >  };
> >
> > -static int tun_attach(struct tun_struct *tun, struct file *file)
> > +static DEFINE_SPINLOCK(tun_lock);
> > +
> > +/*
> > + * get_slot: return a [unused/occupied] slot in tun->tfiles[]:
> > + * - if 'f' is NULL, return the first empty slot;
> > + * - otherwise, return the slot this pointer occupies.
> > + */
> > +static int tun_get_slot(struct tun_struct *tun, struct tun_file
> > *tfile)
> >  {
> > - struct tun_file *tfile = file->private_data;
> > - int err;
> > + int i;
> >
> > - ASSERT_RTNL();
> > + for (i = 0; i < MAX_TAP_QUEUES; i++) {
> > + if (rcu_dereference(tun->tfiles[i]) == tfile)
> > + return i;
> > + }
> >
> > - netif_tx_lock_bh(tun->dev);
> > + /* Should never happen */
> > + BUG_ON(1);
> > +}
> >
> > - err = -EINVAL;
> > - if (tfile->tun)
> > - goto out;
> > +/*
> > + * tun_get_queue(): calculate the queue index
> > + * - if skbs comes from mq nics, we can just borrow
> > + * - if not, calculate from the hash
> > + */
> > +static struct tun_file *tun_get_queue(struct net_device *dev,
> > + struct sk_buff *skb)
> > +{
> > + struct tun_struct *tun = netdev_priv(dev);
> > + struct tun_file *tfile = NULL;
> > + int numqueues = tun->numqueues;
> > + __u32 rxq;
> >
> > - err = -EBUSY;
> > - if (tun->tfile)
> > + BUG_ON(!rcu_read_lock_held());
> > +
> > + if (!numqueues)
> >  		goto out;
> >
> > - err = 0;
> > - tfile->tun = tun;
> > - tun->tfile = tfile;
> > - netif_carrier_on(tun->dev);
> > - dev_hold(tun->dev);
> > - sock_hold(&tfile->sk);
> > - atomic_inc(&tfile->count);
> > + if (likely(skb_rx_queue_recorded(skb))) {
> > + rxq = skb_get_rx_queue(skb);
> > +
> > + while (unlikely(rxq >= numqueues))
> > + rxq -= numqueues;
> > +
> > + tfile = rcu_dereference(tun->tfiles[rxq]);
> > + if (tfile)
> > + goto out;
> > + }
> > +
> > + /* Check if we can use flow to select a queue */
> > + rxq = skb_get_rxhash(skb);
> > + if (rxq) {
> > + tfile = rcu_dereference(tun->tfiles[rxq % numqueues]);
> > + if (tfile)
> > + goto out;
> > + }
> > +
> > + /* Everything failed - find first available queue */
> > + for (rxq = 0; rxq < MAX_TAP_QUEUES; rxq++) {
> > + tfile = rcu_dereference(tun->tfiles[rxq]);
> > + if (tfile)
> > + break;
> > + }
> >
> >  out:
> > - netif_tx_unlock_bh(tun->dev);
> > - return err;
> > + return tfile;
> >  }
> >
> > -static void __tun_detach(struct tun_struct *tun)
> > +static int tun_detach(struct tun_file *tfile, bool clean)
> >  {
> > - struct tun_file *tfile = tun->tfile;
> > - /* Detach from net device */
> > - netif_tx_lock_bh(tun->dev);
> > - netif_carrier_off(tun->dev);
> > - tun->tfile = NULL;
> > - netif_tx_unlock_bh(tun->dev);
> > -
> > - /* Drop read queue */
> > - skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
> > -
> > - /* Drop the extra count on the net device */
> > - dev_put(tun->dev);
> > -}
> > + struct tun_struct *tun;
> > + struct net_device *dev = NULL;
> > + bool destroy = false;
> >
> > -static void tun_detach(struct tun_struct *tun)
> > -{
> > - rtnl_lock();
> > - __tun_detach(tun);
> > - rtnl_unlock();
> > -}
> > + spin_lock(&tun_lock);
> >
> > -static struct tun_struct *__tun_get(struct tun_file *tfile)
> > -{
> > - struct tun_struct *tun = NULL;
> > + tun = rcu_dereference_protected(tfile->tun,
> > + lockdep_is_held(&tun_lock));
> > + if (tun) {
> > + int index = tun_get_slot(tun, tfile);
> 
> Don't we need to be in an RCU read-side critical section in order to
> safely call tun_get_slot()?
> 
> Or is the fact that we are calling with tun_lock held sufficient?
> If the latter, then the rcu_dereference() in tun_get_slot() should
> use rcu_dereference_protected() rather than rcu_dereference().
> 

Nice catch. The latter, tun_lock held is sufficient. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ