[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1271936227.7895.5285.camel@edumazet-laptop>
Date: Thu, 22 Apr 2010 13:37:07 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Changli Gao <xiaosuo@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>, jamal <hadi@...erus.ca>,
Tom Herbert <therbert@...gle.com>, netdev@...r.kernel.org
Subject: Re: [PATCH v5] net: batch skb dequeueing from softnet
input_pkt_queue
Le jeudi 22 avril 2010 à 17:09 +0800, Changli Gao a écrit :
> batch skb dequeueing from softnet input_pkt_queue
>
> batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
> contention when RPS is enabled. input_pkt_queue is reimplemented as a single
> linked list (FIFO) to keep enqueueing and dequeueing as fast as posible, and
> input_pkt_queue_lock is moved into RPS section to reduce 4 bytes on 32bits
> machine.
>
> Note: input_pkt_queue_len doesn't been decreased until process_backlog()
> returns.
>
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ----
> include/linux/netdevice.h | 12 ++++-
> net/core/dev.c | 99 +++++++++++++++++++++++++++++++++-------------
> 2 files changed, 82 insertions(+), 29 deletions(-)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 3c5ed5f..58abdd5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1387,6 +1387,7 @@ struct softnet_data {
> struct Qdisc *output_queue;
> struct list_head poll_list;
> struct sk_buff *completion_queue;
> + struct sk_buff *process_queue;
>
> #ifdef CONFIG_RPS
> struct softnet_data *rps_ipi_list;
> @@ -1396,15 +1397,20 @@ struct softnet_data {
> struct softnet_data *rps_ipi_next;
> unsigned int cpu;
> unsigned int input_queue_head;
> + spinlock_t input_pkt_queue_lock;
> #endif
> - struct sk_buff_head input_pkt_queue;
> + unsigned int input_pkt_queue_len;
> + struct sk_buff *input_pkt_queue_head;
> + struct sk_buff **input_pkt_queue_tailp;
> +
> struct napi_struct backlog;
> };
>
> -static inline void input_queue_head_incr(struct softnet_data *sd)
> +static inline void input_queue_head_add(struct softnet_data *sd,
> + unsigned int len)
> {
> #ifdef CONFIG_RPS
> - sd->input_queue_head++;
> + sd->input_queue_head += len;
> #endif
> }
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index e904c47..f37c223 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -211,14 +211,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
> static inline void rps_lock(struct softnet_data *sd)
> {
> #ifdef CONFIG_RPS
> - spin_lock(&sd->input_pkt_queue.lock);
> + spin_lock(&sd->input_pkt_queue_lock);
> #endif
> }
>
> static inline void rps_unlock(struct softnet_data *sd)
> {
> #ifdef CONFIG_RPS
> - spin_unlock(&sd->input_pkt_queue.lock);
> + spin_unlock(&sd->input_pkt_queue_lock);
> #endif
> }
>
> @@ -2409,12 +2409,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
> __get_cpu_var(netdev_rx_stat).total++;
>
> rps_lock(sd);
> - if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
> - if (sd->input_pkt_queue.qlen) {
> + if (sd->input_pkt_queue_len <= netdev_max_backlog) {
> + if (sd->input_pkt_queue_len) {
> enqueue:
> - __skb_queue_tail(&sd->input_pkt_queue, skb);
> + skb->next = NULL;
> + *sd->input_pkt_queue_tailp = skb;
> + sd->input_pkt_queue_tailp = &skb->next;
> + sd->input_pkt_queue_len++;
> #ifdef CONFIG_RPS
> - *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
> + *qtail = sd->input_queue_head + sd->input_pkt_queue_len;
> #endif
> rps_unlock(sd);
> local_irq_restore(flags);
> @@ -2927,19 +2930,37 @@ EXPORT_SYMBOL(netif_receive_skb);
> /* Network device is going away, flush any packets still pending
> * Called with irqs disabled.
> */
> -static void flush_backlog(void *arg)
> +
> +static struct sk_buff **__flush_backlog(struct softnet_data *sd,
> + struct sk_buff **pskb,
> + struct net_device *dev)
> {
> - struct net_device *dev = arg;
> - struct softnet_data *sd = &__get_cpu_var(softnet_data);
> - struct sk_buff *skb, *tmp;
> + struct sk_buff *skb;
>
> - rps_lock(sd);
> - skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
> + while (*pskb) {
> + skb = *pskb;
> if (skb->dev == dev) {
> - __skb_unlink(skb, &sd->input_pkt_queue);
> + *pskb = skb->next;
> kfree_skb(skb);
> - input_queue_head_incr(sd);
> + input_queue_head_add(sd, 1);
> + sd->input_pkt_queue_len--;
> + } else {
> + pskb = &skb->next;
> }
> + }
> +
> + return pskb;
> +}
> +
> +static void flush_backlog(void *arg)
> +{
> + struct softnet_data *sd = &__get_cpu_var(softnet_data);
> + struct sk_buff **tailp;
> +
> + rps_lock(sd);
> + tailp = __flush_backlog(sd, &sd->input_pkt_queue_head, arg);
> + sd->input_pkt_queue_tailp = tailp;
> + __flush_backlog(sd, &sd->process_queue, arg);
> rps_unlock(sd);
> }
>
> @@ -3249,24 +3270,39 @@ static int process_backlog(struct napi_struct *napi, int quota)
> struct softnet_data *sd = &__get_cpu_var(softnet_data);
>
> napi->weight = weight_p;
> + local_irq_disable();
> do {
> struct sk_buff *skb;
>
> - local_irq_disable();
> + while (sd->process_queue) {
> + skb = sd->process_queue;
> + sd->process_queue = skb->next;
> + local_irq_enable();
> + __netif_receive_skb(skb);
> + if (++work >= quota) {
> + local_irq_disable();
> + rps_lock(sd);
> + goto out;
> + }
> + local_irq_disable();
> + }
> +
> rps_lock(sd);
> - skb = __skb_dequeue(&sd->input_pkt_queue);
> - if (!skb) {
> + if (sd->input_pkt_queue_head == NULL) {
> __napi_complete(napi);
> - rps_unlock(sd);
> - local_irq_enable();
> break;
> }
> - input_queue_head_incr(sd);
> + sd->process_queue = sd->input_pkt_queue_head;
> + sd->input_pkt_queue_head = NULL;
> + sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head;
> rps_unlock(sd);
> - local_irq_enable();
> + } while (1);
>
> - __netif_receive_skb(skb);
> - } while (++work < quota);
> +out:
> + sd->input_pkt_queue_len -= work;
> + input_queue_head_add(sd, work);
> + rps_unlock(sd);
> + local_irq_enable();
>
Please reorder things better.
Most likely this function is called for one packet.
In your version you take twice the rps_lock()/rps_unlock() path, so
it'll be slower.
Once to 'transfert' one list to process list
Once to be able to do the 'label out:' post processing.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists