[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4AF92D6D.8060300@gmail.com>
Date: Tue, 10 Nov 2009 10:07:57 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: xiaosuo@...il.com
CC: "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
Tom Herbert <therbert@...gle.com>
Subject: Re: [PATCH] ifb: add multi-queue support
Changli Gao a écrit :
> ifb: add multi-queue support
>
> Add multi-queue support, and one kernel thread is created for per queue.
> It can used to emulate multi-queue NIC in software, and distribute work
> among CPUs.
> gentux linux # modprobe ifb numtxqs=2
> gentux linux # ifconfig ifb0 up
> gentux linux # pgrep ifb0
> 18508
> 18509
> gentux linux # taskset -p 1 18508
> pid 18508's current affinity mask: 3
> pid 18508's new affinity mask: 1
> gentux linux # taskset -p 2 18509
> pid 18509's current affinity mask: 3
> pid 18509's new affinity mask: 2
> gentux linux # tc qdisc add dev br0 ingress
> gentux linux # tc filter add dev br0 parent ffff: protocol ip basic
> action mirred egress redirect dev ifb0
Seems pretty cool !
I have some comments
>
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ----
> drivers/net/ifb.c | 309
> ++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 186 insertions(+), 123 deletions(-)
>
> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
> index 030913f..6e04188 100644
> --- a/drivers/net/ifb.c
> +++ b/drivers/net/ifb.c
> @@ -33,139 +33,101 @@
> #include <linux/etherdevice.h>
> #include <linux/init.h>
> #include <linux/moduleparam.h>
> +#include <linux/wait.h>
> +#include <linux/sched.h>
> +#include <linux/kthread.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <net/ip.h>
> #include <net/pkt_sched.h>
> #include <net/net_namespace.h>
>
> -#define TX_TIMEOUT (2*HZ)
> -
> #define TX_Q_LIMIT 32
> +
> struct ifb_private {
> - struct tasklet_struct ifb_tasklet;
> - int tasklet_pending;
> - /* mostly debug stats leave in for now */
> - unsigned long st_task_enter; /* tasklet entered */
> - unsigned long st_txq_refl_try; /* transmit queue refill attempt */
> - unsigned long st_rxq_enter; /* receive queue entered */
> - unsigned long st_rx2tx_tran; /* receive to trasmit transfers */
> - unsigned long st_rxq_notenter; /*receiveQ not entered, resched */
> - unsigned long st_rx_frm_egr; /* received from egress path */
> - unsigned long st_rx_frm_ing; /* received from ingress path */
> - unsigned long st_rxq_check;
> - unsigned long st_rxq_rsch;
> - struct sk_buff_head rq;
> - struct sk_buff_head tq;
> + struct net_device *dev;
> + struct sk_buff_head rq;
> + struct sk_buff_head tq;
> + wait_queue_head_t wq;
> + struct task_struct *task;
> };
Maybe you should allocate true per_cpu structure, to avoid cache line sharing
and get appropriate NUMA properties.
At least use a __cacheline_aligned_in_smp ...
>
> +/* Number of ifb devices to be set up by this module. */
> static int numifbs = 2;
> +module_param(numifbs, int, 0444);
> +MODULE_PARM_DESC(numifbs, "Number of ifb devices");
>
> -static void ri_tasklet(unsigned long dev);
> -static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
> -static int ifb_open(struct net_device *dev);
> -static int ifb_close(struct net_device *dev);
> +/* Number of TX queues per ifb */
> +static int numtxqs = 1;
> +module_param(numtxqs, int, 0444);
> +MODULE_PARM_DESC(numtxqs, "Number of TX queues per ifb");
>
> -static void ri_tasklet(unsigned long dev)
> +static int ifb_thread(void *priv)
> {
> -
> - struct net_device *_dev = (struct net_device *)dev;
> - struct ifb_private *dp = netdev_priv(_dev);
> - struct net_device_stats *stats = &_dev->stats;
> - struct netdev_queue *txq;
> + struct ifb_private *dp = (struct ifb_private*)priv;
A space is required before * : (struct ifb_private *)priv;
(in many places in your patch)
> + struct net_device *dev = dp->dev;
> + struct net_device_stats *stats = &dev->stats;
Here you use a net_device_stats that is shared by all your queues,
your updates wont be protected and some will be lost.
You should use txq->tx_ counters.
> + unsigned int num = dp - (struct ifb_private*)netdev_priv(dev);
> + struct netdev_queue *txq = netdev_get_tx_queue(dev, num);
> struct sk_buff *skb;
> -
> - txq = netdev_get_tx_queue(_dev, 0);
> - dp->st_task_enter++;
> - if ((skb = skb_peek(&dp->tq)) == NULL) {
> - dp->st_txq_refl_try++;
> - if (__netif_tx_trylock(txq)) {
> - dp->st_rxq_enter++;
> - while ((skb = skb_dequeue(&dp->rq)) != NULL) {
> + DEFINE_WAIT(wait);
> +
> + while (1) {
> + /* move skb from rq to tq */
> + while (1) {
> + prepare_to_wait(&dp->wq, &wait, TASK_UNINTERRUPTIBLE);
> + while (!__netif_tx_trylock(txq))
> + yield();
> + while ((skb = skb_dequeue(&dp->rq)) != NULL)
> skb_queue_tail(&dp->tq, skb);
> - dp->st_rx2tx_tran++;
> - }
> + if (netif_queue_stopped(dev))
> + netif_wake_queue(dev);
> __netif_tx_unlock(txq);
> - } else {
> - /* reschedule */
> - dp->st_rxq_notenter++;
> - goto resched;
> + if (kthread_should_stop() || !skb_queue_empty(&dp->tq))
> + break;
> + schedule();
> }
> - }
> -
> - while ((skb = skb_dequeue(&dp->tq)) != NULL) {
> - u32 from = G_TC_FROM(skb->tc_verd);
> -
> - skb->tc_verd = 0;
> - skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
> - stats->tx_packets++;
> - stats->tx_bytes +=skb->len;
> -
> - skb->dev = dev_get_by_index(&init_net, skb->iif);
> - if (!skb->dev) {
> - dev_kfree_skb(skb);
> - stats->tx_dropped++;
> + finish_wait(&dp->wq, &wait);
> + if (kthread_should_stop())
> break;
> - }
> - dev_put(skb->dev);
> - skb->iif = _dev->ifindex;
> -
> - if (from & AT_EGRESS) {
> - dp->st_rx_frm_egr++;
> - dev_queue_xmit(skb);
> - } else if (from & AT_INGRESS) {
> - dp->st_rx_frm_ing++;
> - skb_pull(skb, skb->dev->hard_header_len);
> - netif_rx(skb);
> - } else
> - BUG();
> - }
>
> - if (__netif_tx_trylock(txq)) {
> - dp->st_rxq_check++;
> - if ((skb = skb_peek(&dp->rq)) == NULL) {
> - dp->tasklet_pending = 0;
> - if (netif_queue_stopped(_dev))
> - netif_wake_queue(_dev);
> - } else {
> - dp->st_rxq_rsch++;
> - __netif_tx_unlock(txq);
> - goto resched;
> + /* transfer packets */
> + while ((skb = skb_dequeue(&dp->tq)) != NULL) {
> + u32 from = G_TC_FROM(skb->tc_verd);
> +
> + skb->tc_verd = 0;
> + skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
> + stats->tx_packets++;
> + stats->tx_bytes +=skb->len;
Use : txq->tx_packets++
txq->tx_bytes += skb->len;
> +
> + skb->dev = dev_get_by_index(&init_net, skb->iif);
> + if (!skb->dev) {
> + dev_kfree_skb(skb);
> + stats->tx_dropped++;
txq->tx_dropped ?
> + break;
> + }
> + dev_put(skb->dev);
> + skb->iif = dev->ifindex;
> +
> + if (from & AT_EGRESS) {
> + dev_queue_xmit(skb);
> + } else if (from & AT_INGRESS) {
> + skb_pull(skb, skb->dev->hard_header_len);
> + netif_rx_ni(skb);
> + } else
> + BUG();
> }
> - __netif_tx_unlock(txq);
> - } else {
> -resched:
> - dp->tasklet_pending = 1;
> - tasklet_schedule(&dp->ifb_tasklet);
> }
>
> -}
> -
> -static const struct net_device_ops ifb_netdev_ops = {
> - .ndo_open = ifb_open,
> - .ndo_stop = ifb_close,
> - .ndo_start_xmit = ifb_xmit,
> - .ndo_validate_addr = eth_validate_addr,
> -};
> -
> -static void ifb_setup(struct net_device *dev)
> -{
> - /* Initialize the device structure. */
> - dev->destructor = free_netdev;
> - dev->netdev_ops = &ifb_netdev_ops;
> -
> - /* Fill in device structure with ethernet-generic values. */
> - ether_setup(dev);
> - dev->tx_queue_len = TX_Q_LIMIT;
> -
> - dev->flags |= IFF_NOARP;
> - dev->flags &= ~IFF_MULTICAST;
> - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
> - random_ether_addr(dev->dev_addr);
> + return 0;
> }
>
> static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> - struct ifb_private *dp = netdev_priv(dev);
> struct net_device_stats *stats = &dev->stats;
> u32 from = G_TC_FROM(skb->tc_verd);
> + int num = skb_get_queue_mapping(skb);
> + struct ifb_private *dp = ((struct ifb_private*)netdev_priv(dev)) + num;
>
> stats->rx_packets++;
> stats->rx_bytes+=skb->len;
Not sure how to solve this problem (several cpus can updates counter in //)
Thanks
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists