lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 19 Feb 2012 16:51:01 +0200
From:	"Michael S. Tsirkin" <mst@...hat.com>
To:	Anthony Liguori <aliguori@...ibm.com>
Cc:	netdev@...r.kernel.org, Tom Lendacky <toml@...ibm.com>,
	Cristian Viana <vianac@...ibm.com>
Subject: Re: [PATCH 2/2] vhost-net: add a spin_threshold parameter

On Fri, Feb 17, 2012 at 05:02:06PM -0600, Anthony Liguori wrote:
> With workloads that are dominated by very high rates of small packets, we see
> considerable overhead in virtio notifications.
> 
> The best strategy we've been able to come up with to deal with this is adaptive
> polling.
>  This patch simply adds the infrastructure needed to experiment with
> polling strategies.  It is not meant for inclusion.
> 
> Here are the results with various polling values.  The spinning is not currently
> a net win due to the high mutex contention caused by the broadcast wakeup.  With
> a patch attempting to signal wakeup, we see up to 170+ transactions per second
> with TCP_RR 60 instance.
> 
> N  Baseline	Spin 0		Spin 1000	Spin 5000
> 
> TCP_RR
> 
> 1  9,639.66	10,164.06	9,825.43	9,827.45	101.95%
> 10 62,819.55	54,059.78	63,114.30	60,767.23	96.73%
> 30 84,715.60	131,241.86	120,922.38	89,776.39	105.97%
> 60 124,614.71	148,720.66	158,678.08	141,400.05	113.47%
> 
> UDP_RR
> 
> 1  9,652.50	10,343.72	9,493.95	9,569.54	99.14%
> 10 53,830.26	58,235.90	50,145.29	48,820.53	90.69%
> 30 89,471.01	97,634.53	95,108.34	91,263.65	102.00%
> 60 103,640.59	164,035.01	157,002.22	128,646.73	124.13%
> 
> TCP_STREAM
> 1  2,622.63	2,610.71	2,688.49	2,678.61	102.13%
> 4  4,928.02	4,812.05	4,971.00	5,104.57	103.58%
> 
> 1  5,639.89	5,751.28	5,819.81	5,593.62	99.18%
> 4  5,874.72	6,575.55	6,324.87	6,502.33	110.68%
> 
> 1  6,257.42	7,655.22	7,610.52	7,424.74	118.65%
> 4  5,370.78	6,044.83	5,784.23	6,209.93	115.62%
> 
> 1  6,346.63	7,267.44	7,567.39	7,677.93	120.98%
> 4  5,198.02	5,657.12	5,528.94	5,792.42	111.44%
> 
> TCP_MAERTS
> 
> 1  2,091.38	1,765.62	2,142.56	2,312.94	110.59%
> 4  5,319.52	5,619.49	5,544.50	5,645.81	106.13%
> 
> 1  7,030.66	7,593.61	7,575.67	7,622.07	108.41%
> 4  9,040.53	7,275.84	7,322.07	6,681.34	73.90%
> 
> 1  9,160.93	9,318.15	9,065.82	8,586.82	93.73%
> 4  9,372.49	8,875.63	8,959.03	9,056.07	96.62%
> 
> 1  9,183.28	9,134.02	8,945.12	8,657.72	94.28%
> 4  9,377.17	8,877.52	8,959.54	9,071.53	96.74%

An obvious question would be how are BW divided by CPU
numbers affected.

> Cc: Tom Lendacky <toml@...ibm.com>
> Cc: Cristian Viana <vianac@...ibm.com>
> Signed-off-by: Anthony Liguori <aliguori@...ibm.com>
> ---
>  drivers/vhost/net.c |   14 ++++++++++++++
>  1 files changed, 14 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 47175cd..e9e5866 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -37,6 +37,10 @@ static int workers = 2;
>  module_param(workers, int, 0444);
>  MODULE_PARM_DESC(workers, "Set the number of worker threads");
>  
> +static ulong spin_threshold = 0;
> +module_param(spin_threshold, ulong, 0444);
> +MODULE_PARM_DESC(spin_threshold, "The polling threshold for the tx queue");
> +
>  /* Max number of bytes transferred before requeueing the job.
>   * Using this limit prevents one virtqueue from starving others. */
>  #define VHOST_NET_WEIGHT 0x80000
> @@ -65,6 +69,7 @@ struct vhost_net {
>  	 * We only do this when socket buffer fills up.
>  	 * Protected by tx vq lock. */
>  	enum vhost_net_poll_state tx_poll_state;
> +	size_t spin_threshold;
>  };
>  
>  static bool vhost_sock_zcopy(struct socket *sock)
> @@ -149,6 +154,7 @@ static void handle_tx(struct vhost_net *net)
>  	size_t hdr_size;
>  	struct socket *sock;
>  	struct vhost_ubuf_ref *uninitialized_var(ubufs);
> +	size_t spin_count;
>  	bool zcopy;
>  
>  	/* TODO: check that we are running from vhost_worker? */
> @@ -172,6 +178,7 @@ static void handle_tx(struct vhost_net *net)
>  	hdr_size = vq->vhost_hlen;
>  	zcopy = vhost_sock_zcopy(sock);
>  
> +	spin_count = 0;
>  	for (;;) {
>  		/* Release DMAs done buffers first */
>  		if (zcopy)
> @@ -205,9 +212,15 @@ static void handle_tx(struct vhost_net *net)
>  				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
>  				break;
>  			}
> +			if (spin_count < net->spin_threshold) {
> +				spin_count++;
> +				continue;
> +			}
>  			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
>  				vhost_disable_notify(&net->dev, vq);
>  				continue;
> +			} else {
> +				spin_count = 0;
>  			}
>  			break;
>  		}
> @@ -506,6 +519,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  		return -ENOMEM;
>  
>  	dev = &n->dev;
> +	n->spin_threshold = spin_threshold;
>  	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
>  	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
>  	r = vhost_dev_init(dev, n->vqs, workers, VHOST_NET_VQ_MAX);
> -- 
> 1.7.4.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ