[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1271891149.7895.3751.camel@edumazet-laptop>
Date: Thu, 22 Apr 2010 01:05:49 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Changli Gao <xiaosuo@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
Tom Herbert <therbert@...gle.com>, jamal <hadi@...erus.ca>
Subject: Re: [PATCH v3] net: batch skb dequeueing from softnet
input_pkt_queue
Le mercredi 14 avril 2010 à 17:52 +0800, Changli Gao a écrit :
> batch skb dequeueing from softnet input_pkt_queue
>
> batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
> contention and irq disabling/enabling.
>
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ----
lock contention _is_ a problem, Jamal tests can show it.
irq disabling/enabling is not, and force to use stop_machine() killer.
I suggest something very simple, like a small buffer (16 slots), so that
process_backlog() can batch 16 buffers at once.
Following patch not tested, but its late here and I need to sleep ;)
This is a RFC, not for inclusion, and based on current net-next-2.6 tree
[RFC] net: introduce a batch mode in process_backlog()
We see a lock contention on input_pkt_queue.lock in RPS benches.
As suggested by Changli Gao, we can batch several skbs at once in
process_backlog(), so that we dirty input_pkt_queue less often.
I chose to batch at most 16 skbs per round, and place them in
softnet_data zone where flush_backlog() can find them and eventually
free this skbs at device dismantle.
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
include/linux/netdevice.h | 2 +
net/core/dev.c | 48 +++++++++++++++++++++++++++---------
2 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..16da8db 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1383,11 +1383,13 @@ static inline int unregister_gifconf(unsigned int family)
/*
* Incoming packets are placed on per-cpu queues
*/
+#define SD_BATCH_SZ 16
struct softnet_data {
struct Qdisc *output_queue;
struct list_head poll_list;
struct sk_buff *completion_queue;
+ struct sk_buff *batch[SD_BATCH_SZ]; /* process_backlog() & flush_backlog() */
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index e904c47..2673ce0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2932,6 +2932,7 @@ static void flush_backlog(void *arg)
struct net_device *dev = arg;
struct softnet_data *sd = &__get_cpu_var(softnet_data);
struct sk_buff *skb, *tmp;
+ int i;
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
@@ -2941,6 +2942,13 @@ static void flush_backlog(void *arg)
input_queue_head_incr(sd);
}
rps_unlock(sd);
+ for (i = 0; i < ARRAY_SIZE(sd->batch); i++) {
+ skb = sd->batch[i];
+ if (skb && skb->dev == dev) {
+ kfree_skb(skb);
+ sd->batch[i] = NULL;
+ }
+ }
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3245,29 +3253,47 @@ EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
{
- int work = 0;
+ int i, n, lim, work = 0;
struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ struct sk_buff *skb;
napi->weight = weight_p;
+ local_irq_disable();
+
do {
- struct sk_buff *skb;
+ lim = quota - work;
+ if (lim > ARRAY_SIZE(sd->batch))
+ lim = ARRAY_SIZE(sd->batch);
+ /* batch at most 16 buffers */
- local_irq_disable();
rps_lock(sd);
- skb = __skb_dequeue(&sd->input_pkt_queue);
- if (!skb) {
+ for (n = 0; n < lim; n++) {
+ sd->batch[n] = __skb_dequeue(&sd->input_pkt_queue);
+ if (!sd->batch[n])
+ break;
+ }
+ if (!sd->input_pkt_queue.qlen) {
__napi_complete(napi);
- rps_unlock(sd);
- local_irq_enable();
- break;
+ quota = 0;
}
- input_queue_head_incr(sd);
rps_unlock(sd);
- local_irq_enable();
- __netif_receive_skb(skb);
- } while (++work < quota);
+ /* Now process our batch */
+ for (i = 0; i < n; i++) {
+ skb = sd->batch[i];
+ /* flush_backlog() might have stolen this skb */
+ input_queue_head_incr(sd);
+ if (likely(skb)) {
+ sd->batch[i] = NULL;
+ local_irq_enable();
+ __netif_receive_skb(skb);
+ local_irq_disable();
+ }
+ }
+ work += n;
+ } while (work < quota);
+ local_irq_enable();
return work;
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists