lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <1271927357-2973-1-git-send-email-xiaosuo@gmail.com> Date: Thu, 22 Apr 2010 17:09:17 +0800 From: Changli Gao <xiaosuo@...il.com> To: "David S. Miller" <davem@...emloft.net> Cc: jamal <hadi@...erus.ca>, Tom Herbert <therbert@...gle.com>, Eric Dumazet <eric.dumazet@...il.com>, netdev@...r.kernel.org, Changli Gao <xiaosuo@...il.com> Subject: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue batch skb dequeueing from softnet input_pkt_queue batch skb dequeueing from softnet input_pkt_queue to reduce potential lock contention when RPS is enabled. input_pkt_queue is reimplemented as a single linked list (FIFO) to keep enqueueing and dequeueing as fast as posible, and input_pkt_queue_lock is moved into RPS section to reduce 4 bytes on 32bits machine. Note: input_pkt_queue_len doesn't been decreased until process_backlog() returns. Signed-off-by: Changli Gao <xiaosuo@...il.com> ---- include/linux/netdevice.h | 12 ++++- net/core/dev.c | 99 +++++++++++++++++++++++++++++++++------------- 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c5ed5f..58abdd5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1387,6 +1387,7 @@ struct softnet_data { struct Qdisc *output_queue; struct list_head poll_list; struct sk_buff *completion_queue; + struct sk_buff *process_queue; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; @@ -1396,15 +1397,20 @@ struct softnet_data { struct softnet_data *rps_ipi_next; unsigned int cpu; unsigned int input_queue_head; + spinlock_t input_pkt_queue_lock; #endif - struct sk_buff_head input_pkt_queue; + unsigned int input_pkt_queue_len; + struct sk_buff *input_pkt_queue_head; + struct sk_buff **input_pkt_queue_tailp; + struct napi_struct backlog; }; -static inline void input_queue_head_incr(struct softnet_data *sd) +static inline void input_queue_head_add(struct softnet_data *sd, + unsigned int len) { #ifdef CONFIG_RPS - sd->input_queue_head++; + sd->input_queue_head += len; #endif } diff --git a/net/core/dev.c b/net/core/dev.c index e904c47..f37c223 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -211,14 +211,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_lock(&sd->input_pkt_queue.lock); + spin_lock(&sd->input_pkt_queue_lock); #endif } static inline void rps_unlock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_unlock(&sd->input_pkt_queue.lock); + spin_unlock(&sd->input_pkt_queue_lock); #endif } @@ -2409,12 +2409,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, __get_cpu_var(netdev_rx_stat).total++; rps_lock(sd); - if (sd->input_pkt_queue.qlen <= netdev_max_backlog) { - if (sd->input_pkt_queue.qlen) { + if (sd->input_pkt_queue_len <= netdev_max_backlog) { + if (sd->input_pkt_queue_len) { enqueue: - __skb_queue_tail(&sd->input_pkt_queue, skb); + skb->next = NULL; + *sd->input_pkt_queue_tailp = skb; + sd->input_pkt_queue_tailp = &skb->next; + sd->input_pkt_queue_len++; #ifdef CONFIG_RPS - *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen; + *qtail = sd->input_queue_head + sd->input_pkt_queue_len; #endif rps_unlock(sd); local_irq_restore(flags); @@ -2927,19 +2930,37 @@ EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending * Called with irqs disabled. */ -static void flush_backlog(void *arg) + +static struct sk_buff **__flush_backlog(struct softnet_data *sd, + struct sk_buff **pskb, + struct net_device *dev) { - struct net_device *dev = arg; - struct softnet_data *sd = &__get_cpu_var(softnet_data); - struct sk_buff *skb, *tmp; + struct sk_buff *skb; - rps_lock(sd); - skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) + while (*pskb) { + skb = *pskb; if (skb->dev == dev) { - __skb_unlink(skb, &sd->input_pkt_queue); + *pskb = skb->next; kfree_skb(skb); - input_queue_head_incr(sd); + input_queue_head_add(sd, 1); + sd->input_pkt_queue_len--; + } else { + pskb = &skb->next; } + } + + return pskb; +} + +static void flush_backlog(void *arg) +{ + struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct sk_buff **tailp; + + rps_lock(sd); + tailp = __flush_backlog(sd, &sd->input_pkt_queue_head, arg); + sd->input_pkt_queue_tailp = tailp; + __flush_backlog(sd, &sd->process_queue, arg); rps_unlock(sd); } @@ -3249,24 +3270,39 @@ static int process_backlog(struct napi_struct *napi, int quota) struct softnet_data *sd = &__get_cpu_var(softnet_data); napi->weight = weight_p; + local_irq_disable(); do { struct sk_buff *skb; - local_irq_disable(); + while (sd->process_queue) { + skb = sd->process_queue; + sd->process_queue = skb->next; + local_irq_enable(); + __netif_receive_skb(skb); + if (++work >= quota) { + local_irq_disable(); + rps_lock(sd); + goto out; + } + local_irq_disable(); + } + rps_lock(sd); - skb = __skb_dequeue(&sd->input_pkt_queue); - if (!skb) { + if (sd->input_pkt_queue_head == NULL) { __napi_complete(napi); - rps_unlock(sd); - local_irq_enable(); break; } - input_queue_head_incr(sd); + sd->process_queue = sd->input_pkt_queue_head; + sd->input_pkt_queue_head = NULL; + sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head; rps_unlock(sd); - local_irq_enable(); + } while (1); - __netif_receive_skb(skb); - } while (++work < quota); +out: + sd->input_pkt_queue_len -= work; + input_queue_head_add(sd, work); + rps_unlock(sd); + local_irq_enable(); return work; } @@ -5621,10 +5657,17 @@ static int dev_cpu_callback(struct notifier_block *nfb, local_irq_enable(); /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = oldsd->input_pkt_queue_head)) { + oldsd->input_pkt_queue_head = skb->next; + netif_rx(skb); + } + while ((skb = oldsd->process_queue)) { + oldsd->process_queue = skb->next; netif_rx(skb); - input_queue_head_incr(oldsd); } + oldsd->input_pkt_queue_tailp = &oldsd->input_pkt_queue_head; + input_queue_head_add(oldsd, oldsd->input_pkt_queue_len); + oldsd->input_pkt_queue_len = 0; return NOTIFY_OK; } @@ -5842,11 +5885,15 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); - skb_queue_head_init(&sd->input_pkt_queue); + sd->input_pkt_queue_head = NULL; + sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head; + sd->input_pkt_queue_len = 0; + sd->process_queue = NULL; sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); #ifdef CONFIG_RPS + spin_lock_init(&sd->input_pkt_queue_lock); sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; sd->csd.flags = 0; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists