lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1271936227.7895.5285.camel@edumazet-laptop>
Date:	Thu, 22 Apr 2010 13:37:07 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Changli Gao <xiaosuo@...il.com>
Cc:	"David S. Miller" <davem@...emloft.net>, jamal <hadi@...erus.ca>,
	Tom Herbert <therbert@...gle.com>, netdev@...r.kernel.org
Subject: Re: [PATCH v5] net: batch skb dequeueing from softnet
 input_pkt_queue

Le jeudi 22 avril 2010 à 17:09 +0800, Changli Gao a écrit :
> batch skb dequeueing from softnet input_pkt_queue
> 
> batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
> contention when RPS is enabled. input_pkt_queue is reimplemented as a single
> linked list (FIFO) to keep enqueueing and dequeueing as fast as posible, and
> input_pkt_queue_lock is moved into RPS section to reduce 4 bytes on 32bits
> machine.
> 
> Note: input_pkt_queue_len doesn't been decreased until process_backlog()
> returns.
> 
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ----
>  include/linux/netdevice.h |   12 ++++-
>  net/core/dev.c            |   99 +++++++++++++++++++++++++++++++++-------------
>  2 files changed, 82 insertions(+), 29 deletions(-)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 3c5ed5f..58abdd5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1387,6 +1387,7 @@ struct softnet_data {
>  	struct Qdisc		*output_queue;
>  	struct list_head	poll_list;
>  	struct sk_buff		*completion_queue;
> +	struct sk_buff		*process_queue;
>  
>  #ifdef CONFIG_RPS
>  	struct softnet_data	*rps_ipi_list;
> @@ -1396,15 +1397,20 @@ struct softnet_data {
>  	struct softnet_data	*rps_ipi_next;
>  	unsigned int		cpu;
>  	unsigned int		input_queue_head;
> +	spinlock_t		input_pkt_queue_lock;
>  #endif
> -	struct sk_buff_head	input_pkt_queue;
> +	unsigned int		input_pkt_queue_len;
> +	struct sk_buff		*input_pkt_queue_head;
> +	struct sk_buff		**input_pkt_queue_tailp;
> +
>  	struct napi_struct	backlog;
>  };
>  
> -static inline void input_queue_head_incr(struct softnet_data *sd)
> +static inline void input_queue_head_add(struct softnet_data *sd,
> +					unsigned int len)
>  {
>  #ifdef CONFIG_RPS
> -	sd->input_queue_head++;
> +	sd->input_queue_head += len;
>  #endif
>  }
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index e904c47..f37c223 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -211,14 +211,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
>  static inline void rps_lock(struct softnet_data *sd)
>  {
>  #ifdef CONFIG_RPS
> -	spin_lock(&sd->input_pkt_queue.lock);
> +	spin_lock(&sd->input_pkt_queue_lock);
>  #endif
>  }
>  
>  static inline void rps_unlock(struct softnet_data *sd)
>  {
>  #ifdef CONFIG_RPS
> -	spin_unlock(&sd->input_pkt_queue.lock);
> +	spin_unlock(&sd->input_pkt_queue_lock);
>  #endif
>  }
>  
> @@ -2409,12 +2409,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
>  	__get_cpu_var(netdev_rx_stat).total++;
>  
>  	rps_lock(sd);
> -	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
> -		if (sd->input_pkt_queue.qlen) {
> +	if (sd->input_pkt_queue_len <= netdev_max_backlog) {
> +		if (sd->input_pkt_queue_len) {
>  enqueue:
> -			__skb_queue_tail(&sd->input_pkt_queue, skb);
> +			skb->next = NULL;
> +			*sd->input_pkt_queue_tailp = skb;
> +			sd->input_pkt_queue_tailp = &skb->next;
> +			sd->input_pkt_queue_len++;
>  #ifdef CONFIG_RPS
> -			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
> +			*qtail = sd->input_queue_head + sd->input_pkt_queue_len;
>  #endif
>  			rps_unlock(sd);
>  			local_irq_restore(flags);
> @@ -2927,19 +2930,37 @@ EXPORT_SYMBOL(netif_receive_skb);
>  /* Network device is going away, flush any packets still pending
>   * Called with irqs disabled.
>   */
> -static void flush_backlog(void *arg)
> +
> +static struct sk_buff **__flush_backlog(struct softnet_data *sd,
> +					struct sk_buff **pskb,
> +					struct net_device *dev)
>  {
> -	struct net_device *dev = arg;
> -	struct softnet_data *sd = &__get_cpu_var(softnet_data);
> -	struct sk_buff *skb, *tmp;
> +	struct sk_buff *skb;
>  
> -	rps_lock(sd);
> -	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
> +	while (*pskb) {
> +		skb = *pskb;
>  		if (skb->dev == dev) {
> -			__skb_unlink(skb, &sd->input_pkt_queue);
> +			*pskb = skb->next;
>  			kfree_skb(skb);
> -			input_queue_head_incr(sd);
> +			input_queue_head_add(sd, 1);
> +			sd->input_pkt_queue_len--;
> +		} else {
> +			pskb = &skb->next;
>  		}
> +	}
> +
> +	return pskb;
> +}
> +
> +static void flush_backlog(void *arg)
> +{
> +	struct softnet_data *sd = &__get_cpu_var(softnet_data);
> +	struct sk_buff **tailp;
> +
> +	rps_lock(sd);
> +	tailp = __flush_backlog(sd, &sd->input_pkt_queue_head, arg);
> +	sd->input_pkt_queue_tailp = tailp;
> +	__flush_backlog(sd, &sd->process_queue, arg);
>  	rps_unlock(sd);
>  }
>  
> @@ -3249,24 +3270,39 @@ static int process_backlog(struct napi_struct *napi, int quota)
>  	struct softnet_data *sd = &__get_cpu_var(softnet_data);
>  
>  	napi->weight = weight_p;
> +	local_irq_disable();
>  	do {
>  		struct sk_buff *skb;
>  
> -		local_irq_disable();
> +		while (sd->process_queue) {
> +			skb = sd->process_queue;
> +			sd->process_queue = skb->next;
> +			local_irq_enable();
> +			__netif_receive_skb(skb);
> +			if (++work >= quota) {
> +				local_irq_disable();
> +				rps_lock(sd);
> +				goto out;
> +			}
> +			local_irq_disable();
> +		}
> +
>  		rps_lock(sd);
> -		skb = __skb_dequeue(&sd->input_pkt_queue);
> -		if (!skb) {
> +		if (sd->input_pkt_queue_head == NULL) {
>  			__napi_complete(napi);
> -			rps_unlock(sd);
> -			local_irq_enable();
>  			break;
>  		}
> -		input_queue_head_incr(sd);
> +		sd->process_queue = sd->input_pkt_queue_head;
> +		sd->input_pkt_queue_head = NULL;
> +		sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head;
>  		rps_unlock(sd);
> -		local_irq_enable();
> +	} while (1);
>  
> -		__netif_receive_skb(skb);
> -	} while (++work < quota);
> +out:
> +	sd->input_pkt_queue_len -= work;
> +	input_queue_head_add(sd, work);
> +	rps_unlock(sd);
> +	local_irq_enable();
>  



Please reorder things better.

Most likely this function is called for one packet.

In your version you take twice the rps_lock()/rps_unlock() path, so
it'll be slower.

Once to 'transfert' one list to process list

Once to be able to do the 'label out:' post processing.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ