netdev - Re: [PATCH 4/5] ifb: add multiqueue support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com>
Date:	Tue, 14 Dec 2010 07:42:37 +0800
From:	Changli Gao <xiaosuo@...il.com>
To:	Eric Dumazet <eric.dumazet@...il.com>
Cc:	Jamal Hadi Salim <hadi@...erus.ca>,
	"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH 4/5] ifb: add multiqueue support

On Tue, Dec 14, 2010 at 12:26 AM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> Each ifb NIC has nr_cpu_ids rx queues and nr_cpu_ids queues. Packets
>> transmitted to ifb are enqueued to the corresponding per cpu tx queues,
>> and processed in the corresponding per cpu tasklet latter.
>>
>> The stats are converted to the u64 ones.
>>
>> tq is a stack variable now. It makes ifb_q_private smaller and tx queue
>> locked only once in ri_tasklet.
>>
>
> Might be good to say the tx_queue_len is multiplied by number of online
> cpus ;)

Thanks for completion.

>
>> Signed-off-by: Changli Gao <xiaosuo@...il.com>
>> ---
>>  drivers/net/ifb.c |  211 ++++++++++++++++++++++++++++++++++++------------------
>>  1 file changed, 141 insertions(+), 70 deletions(-)
>> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
>> index 57c5cfb..16c767b 100644
>> --- a/drivers/net/ifb.c
>> +++ b/drivers/net/ifb.c
>> @@ -37,56 +37,63 @@
>>  #include <net/net_namespace.h>
>>
>>  #define TX_Q_LIMIT    32
>> +struct ifb_q_private {
>> +     struct tasklet_struct   ifb_tasklet;
>> +     struct sk_buff_head     rq;
>> +     struct u64_stats_sync   syncp;
>> +     u64                     rx_packets;
>> +     u64                     rx_bytes;
>> +     u64                     rx_dropped;
>> +};
>> +
>>  struct ifb_private {
>> -     struct tasklet_struct   ifb_tasklet;
>> -     int     tasklet_pending;
>> -     struct sk_buff_head     rq;
>> -     struct sk_buff_head     tq;
>> +     struct ifb_q_private __percpu   *q;
>
> You probably could use dev->ml_priv (lstats/tstats/dstats)
> so that ifb_private just disapears (we save a full cache line)

Good idea. Thanks.

>
>>  };
>>
>>  static int numifbs = 2;
>>
>> -static void ri_tasklet(unsigned long dev)
>> +static void ri_tasklet(unsigned long _dev)
>>  {
>> -
>> -     struct net_device *_dev = (struct net_device *)dev;
>> -     struct ifb_private *dp = netdev_priv(_dev);
>> -     struct net_device_stats *stats = &_dev->stats;
>> +     struct net_device *dev = (struct net_device *)_dev;
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp;
>>       struct netdev_queue *txq;
>>       struct sk_buff *skb;
>> -
>> -     txq = netdev_get_tx_queue(_dev, 0);
>> -     skb = skb_peek(&dp->tq);
>> -     if (skb == NULL) {
>> -             if (__netif_tx_trylock(txq)) {
>> -                     skb_queue_splice_tail_init(&dp->rq, &dp->tq);
>> -                     __netif_tx_unlock(txq);
>> -             } else {
>> -                     /* reschedule */
>> -                     goto resched;
>> -             }
>> +     struct sk_buff_head tq;
>> +
>> +     __skb_queue_head_init(&tq);
>> +     txq = netdev_get_tx_queue(dev, raw_smp_processor_id());
>> +     qp = per_cpu_ptr(p->q, raw_smp_processor_id());
>
>        qp = this_cpu_ptr(dev->ifb_qp); is faster

and less, thanks. :)

>
>> +     if (!__netif_tx_trylock(txq)) {
>> +             tasklet_schedule(&qp->ifb_tasklet);
>> +             return;
>>       }
>> +     skb_queue_splice_tail_init(&qp->rq, &tq);
>> +     if (netif_tx_queue_stopped(txq))
>> +             netif_tx_wake_queue(txq);
>> +     __netif_tx_unlock(txq);
>>
>> -     while ((skb = skb_dequeue(&dp->tq)) != NULL) {
>> +     while ((skb = __skb_dequeue(&tq)) != NULL) {
>>               u32 from = G_TC_FROM(skb->tc_verd);
>>
>>               skb->tc_verd = 0;
>>               skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
>> -             stats->tx_packets++;
>> -             stats->tx_bytes += skb->len;
>> +             u64_stats_update_begin(&qp->syncp);
>> +             txq->tx_packets++;
>> +             txq->tx_bytes += skb->len;
>>
>>               rcu_read_lock();
>>               skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
>>               if (!skb->dev) {
>>                       rcu_read_unlock();
>> +                     txq->tx_dropped++;
>> +                     u64_stats_update_end(&qp->syncp);
>>                       dev_kfree_skb(skb);
>> -                     stats->tx_dropped++;
>> -                     if (skb_queue_len(&dp->tq) != 0)
>> -                             goto resched;
>> -                     break;
>> +                     continue;
>>               }
>>               rcu_read_unlock();
>> -             skb->skb_iif = _dev->ifindex;
>> +             u64_stats_update_end(&qp->syncp);
>> +             skb->skb_iif = dev->ifindex;
>
> Why is this necessary ? shouldnt skb->skb_iif already be dev->ifindex ?

No. In fact, act_mirred use skb_iff to save the original dev.

        skb2->skb_iif = skb->dev->ifindex;
        skb2->dev = dev;

>
>>
>>               if (from & AT_EGRESS) {
>>                       dev_queue_xmit(skb);
>> @@ -96,48 +103,32 @@ static void ri_tasklet(unsigned long dev)
>>               } else
>>                       BUG();
>>       }
>> -
>> -     if (__netif_tx_trylock(txq)) {
>> -             skb = skb_peek(&dp->rq);
>> -             if (skb == NULL) {
>> -                     dp->tasklet_pending = 0;
>> -                     if (netif_queue_stopped(_dev))
>> -                             netif_wake_queue(_dev);
>> -             } else {
>> -                     __netif_tx_unlock(txq);
>> -                     goto resched;
>> -             }
>> -             __netif_tx_unlock(txq);
>> -     } else {
>> -resched:
>> -             dp->tasklet_pending = 1;
>> -             tasklet_schedule(&dp->ifb_tasklet);
>> -     }
>> -
>>  }
>>
>>  static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> -     struct net_device_stats *stats = &dev->stats;
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp = per_cpu_ptr(p->q,
>> +                                            skb_get_queue_mapping(skb));
>
> Would be good to add a
>
> WARN_ON(skb_get_queue_mapping(skb) != smp_processor_id());

I think it maybe what Jamal wants too. Thanks.

>
>
>>       u32 from = G_TC_FROM(skb->tc_verd);
>>
>> -     stats->rx_packets++;
>> -     stats->rx_bytes += skb->len;
>> +     u64_stats_update_begin(&qp->syncp);
>> +     qp->rx_packets++;
>> +     qp->rx_bytes += skb->len;
>>
>>       if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
>> +             qp->rx_dropped++;
>> +             u64_stats_update_end(&qp->syncp);
>>               dev_kfree_skb(skb);
>> -             stats->rx_dropped++;
>>               return NETDEV_TX_OK;
>>       }
>> +     u64_stats_update_end(&qp->syncp);
>>
>> -     __skb_queue_tail(&dp->rq, skb);
>> -     if (!dp->tasklet_pending) {
>> -             dp->tasklet_pending = 1;
>> -             tasklet_schedule(&dp->ifb_tasklet);
>> -     }
>> +     __skb_queue_tail(&qp->rq, skb);
>> +     if (skb_queue_len(&qp->rq) == 1)
>> +             tasklet_schedule(&qp->ifb_tasklet);
>>
>> -     if (skb_queue_len(&dp->rq) >= dev->tx_queue_len)
>> +     if (skb_queue_len(&qp->rq) >= dev->tx_queue_len)
>
> This seems wrong...
> You need to change to netif_tx_stop_queue(txq)

Thanks for catching this. I missed it.

>
>>               netif_stop_queue(dev);
>>
>>       return NETDEV_TX_OK;
>> @@ -145,33 +136,103 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>
>>  static int ifb_close(struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> -
>> -     tasklet_kill(&dp->ifb_tasklet);
>> -     netif_stop_queue(dev);
>> -     __skb_queue_purge(&dp->rq);
>> -     __skb_queue_purge(&dp->tq);
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp;
>> +     int cpu;
>> +
>> +     for_each_possible_cpu(cpu) {
>> +             qp = per_cpu_ptr(p->q, cpu);
>> +             tasklet_kill(&qp->ifb_tasklet);
>> +             netif_tx_stop_queue(netdev_get_tx_queue(dev, cpu));
>> +             __skb_queue_purge(&qp->rq);
>> +     }
>>
>>       return 0;
>>  }
>>
>>  static int ifb_open(struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> +     int cpu;
>> +
>
>
>
>> +     for_each_possible_cpu(cpu)
>> +             netif_tx_start_queue(netdev_get_tx_queue(dev, cpu));
>> +
>> +     return 0;
>> +}
>> +
>> +static int ifb_init(struct net_device *dev)
>> +{
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *q;
>> +     int cpu;
>>
>> -     tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
>> -     __skb_queue_head_init(&dp->rq);
>> -     __skb_queue_head_init(&dp->tq);
>> -     netif_start_queue(dev);
>> +     p->q = alloc_percpu(struct ifb_q_private);
>> +     if (!p->q)
>> +             return -ENOMEM;
>
> Hmm, maybe  use netdev_queue_numa_node_write() somewhere, so that
> qdisc_alloc() can use NUMA affinities.

I'll add it, thanks.

-- 
Regards,
Changli Gao(xiaosuo@...il.com)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html