[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com>
Date: Tue, 14 Dec 2010 07:42:37 +0800
From: Changli Gao <xiaosuo@...il.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: Jamal Hadi Salim <hadi@...erus.ca>,
"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH 4/5] ifb: add multiqueue support
On Tue, Dec 14, 2010 at 12:26 AM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> Each ifb NIC has nr_cpu_ids rx queues and nr_cpu_ids queues. Packets
>> transmitted to ifb are enqueued to the corresponding per cpu tx queues,
>> and processed in the corresponding per cpu tasklet latter.
>>
>> The stats are converted to the u64 ones.
>>
>> tq is a stack variable now. It makes ifb_q_private smaller and tx queue
>> locked only once in ri_tasklet.
>>
>
> Might be good to say the tx_queue_len is multiplied by number of online
> cpus ;)
Thanks for completion.
>
>> Signed-off-by: Changli Gao <xiaosuo@...il.com>
>> ---
>> drivers/net/ifb.c | 211 ++++++++++++++++++++++++++++++++++++------------------
>> 1 file changed, 141 insertions(+), 70 deletions(-)
>> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
>> index 57c5cfb..16c767b 100644
>> --- a/drivers/net/ifb.c
>> +++ b/drivers/net/ifb.c
>> @@ -37,56 +37,63 @@
>> #include <net/net_namespace.h>
>>
>> #define TX_Q_LIMIT 32
>> +struct ifb_q_private {
>> + struct tasklet_struct ifb_tasklet;
>> + struct sk_buff_head rq;
>> + struct u64_stats_sync syncp;
>> + u64 rx_packets;
>> + u64 rx_bytes;
>> + u64 rx_dropped;
>> +};
>> +
>> struct ifb_private {
>> - struct tasklet_struct ifb_tasklet;
>> - int tasklet_pending;
>> - struct sk_buff_head rq;
>> - struct sk_buff_head tq;
>> + struct ifb_q_private __percpu *q;
>
> You probably could use dev->ml_priv (lstats/tstats/dstats)
> so that ifb_private just disapears (we save a full cache line)
Good idea. Thanks.
>
>> };
>>
>> static int numifbs = 2;
>>
>> -static void ri_tasklet(unsigned long dev)
>> +static void ri_tasklet(unsigned long _dev)
>> {
>> -
>> - struct net_device *_dev = (struct net_device *)dev;
>> - struct ifb_private *dp = netdev_priv(_dev);
>> - struct net_device_stats *stats = &_dev->stats;
>> + struct net_device *dev = (struct net_device *)_dev;
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp;
>> struct netdev_queue *txq;
>> struct sk_buff *skb;
>> -
>> - txq = netdev_get_tx_queue(_dev, 0);
>> - skb = skb_peek(&dp->tq);
>> - if (skb == NULL) {
>> - if (__netif_tx_trylock(txq)) {
>> - skb_queue_splice_tail_init(&dp->rq, &dp->tq);
>> - __netif_tx_unlock(txq);
>> - } else {
>> - /* reschedule */
>> - goto resched;
>> - }
>> + struct sk_buff_head tq;
>> +
>> + __skb_queue_head_init(&tq);
>> + txq = netdev_get_tx_queue(dev, raw_smp_processor_id());
>> + qp = per_cpu_ptr(p->q, raw_smp_processor_id());
>
> qp = this_cpu_ptr(dev->ifb_qp); is faster
and less, thanks. :)
>
>> + if (!__netif_tx_trylock(txq)) {
>> + tasklet_schedule(&qp->ifb_tasklet);
>> + return;
>> }
>> + skb_queue_splice_tail_init(&qp->rq, &tq);
>> + if (netif_tx_queue_stopped(txq))
>> + netif_tx_wake_queue(txq);
>> + __netif_tx_unlock(txq);
>>
>> - while ((skb = skb_dequeue(&dp->tq)) != NULL) {
>> + while ((skb = __skb_dequeue(&tq)) != NULL) {
>> u32 from = G_TC_FROM(skb->tc_verd);
>>
>> skb->tc_verd = 0;
>> skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
>> - stats->tx_packets++;
>> - stats->tx_bytes += skb->len;
>> + u64_stats_update_begin(&qp->syncp);
>> + txq->tx_packets++;
>> + txq->tx_bytes += skb->len;
>>
>> rcu_read_lock();
>> skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
>> if (!skb->dev) {
>> rcu_read_unlock();
>> + txq->tx_dropped++;
>> + u64_stats_update_end(&qp->syncp);
>> dev_kfree_skb(skb);
>> - stats->tx_dropped++;
>> - if (skb_queue_len(&dp->tq) != 0)
>> - goto resched;
>> - break;
>> + continue;
>> }
>> rcu_read_unlock();
>> - skb->skb_iif = _dev->ifindex;
>> + u64_stats_update_end(&qp->syncp);
>> + skb->skb_iif = dev->ifindex;
>
> Why is this necessary ? shouldnt skb->skb_iif already be dev->ifindex ?
No. In fact, act_mirred use skb_iff to save the original dev.
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
>
>>
>> if (from & AT_EGRESS) {
>> dev_queue_xmit(skb);
>> @@ -96,48 +103,32 @@ static void ri_tasklet(unsigned long dev)
>> } else
>> BUG();
>> }
>> -
>> - if (__netif_tx_trylock(txq)) {
>> - skb = skb_peek(&dp->rq);
>> - if (skb == NULL) {
>> - dp->tasklet_pending = 0;
>> - if (netif_queue_stopped(_dev))
>> - netif_wake_queue(_dev);
>> - } else {
>> - __netif_tx_unlock(txq);
>> - goto resched;
>> - }
>> - __netif_tx_unlock(txq);
>> - } else {
>> -resched:
>> - dp->tasklet_pending = 1;
>> - tasklet_schedule(&dp->ifb_tasklet);
>> - }
>> -
>> }
>>
>> static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> - struct net_device_stats *stats = &dev->stats;
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp = per_cpu_ptr(p->q,
>> + skb_get_queue_mapping(skb));
>
> Would be good to add a
>
> WARN_ON(skb_get_queue_mapping(skb) != smp_processor_id());
I think it maybe what Jamal wants too. Thanks.
>
>
>> u32 from = G_TC_FROM(skb->tc_verd);
>>
>> - stats->rx_packets++;
>> - stats->rx_bytes += skb->len;
>> + u64_stats_update_begin(&qp->syncp);
>> + qp->rx_packets++;
>> + qp->rx_bytes += skb->len;
>>
>> if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
>> + qp->rx_dropped++;
>> + u64_stats_update_end(&qp->syncp);
>> dev_kfree_skb(skb);
>> - stats->rx_dropped++;
>> return NETDEV_TX_OK;
>> }
>> + u64_stats_update_end(&qp->syncp);
>>
>> - __skb_queue_tail(&dp->rq, skb);
>> - if (!dp->tasklet_pending) {
>> - dp->tasklet_pending = 1;
>> - tasklet_schedule(&dp->ifb_tasklet);
>> - }
>> + __skb_queue_tail(&qp->rq, skb);
>> + if (skb_queue_len(&qp->rq) == 1)
>> + tasklet_schedule(&qp->ifb_tasklet);
>>
>> - if (skb_queue_len(&dp->rq) >= dev->tx_queue_len)
>> + if (skb_queue_len(&qp->rq) >= dev->tx_queue_len)
>
> This seems wrong...
> You need to change to netif_tx_stop_queue(txq)
Thanks for catching this. I missed it.
>
>> netif_stop_queue(dev);
>>
>> return NETDEV_TX_OK;
>> @@ -145,33 +136,103 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>
>> static int ifb_close(struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> -
>> - tasklet_kill(&dp->ifb_tasklet);
>> - netif_stop_queue(dev);
>> - __skb_queue_purge(&dp->rq);
>> - __skb_queue_purge(&dp->tq);
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp;
>> + int cpu;
>> +
>> + for_each_possible_cpu(cpu) {
>> + qp = per_cpu_ptr(p->q, cpu);
>> + tasklet_kill(&qp->ifb_tasklet);
>> + netif_tx_stop_queue(netdev_get_tx_queue(dev, cpu));
>> + __skb_queue_purge(&qp->rq);
>> + }
>>
>> return 0;
>> }
>>
>> static int ifb_open(struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> + int cpu;
>> +
>
>
>
>> + for_each_possible_cpu(cpu)
>> + netif_tx_start_queue(netdev_get_tx_queue(dev, cpu));
>> +
>> + return 0;
>> +}
>> +
>> +static int ifb_init(struct net_device *dev)
>> +{
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *q;
>> + int cpu;
>>
>> - tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
>> - __skb_queue_head_init(&dp->rq);
>> - __skb_queue_head_init(&dp->tq);
>> - netif_start_queue(dev);
>> + p->q = alloc_percpu(struct ifb_q_private);
>> + if (!p->q)
>> + return -ENOMEM;
>
> Hmm, maybe use netdev_queue_numa_node_write() somewhere, so that
> qdisc_alloc() can use NUMA affinities.
I'll add it, thanks.
--
Regards,
Changli Gao(xiaosuo@...il.com)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists