lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 17 Aug 2014 12:20:56 +0200
From:	"Michael S. Tsirkin" <mst@...hat.com>
To:	Jason Wang <jasowang@...hat.com>
Cc:	kvm@...r.kernel.org, virtualization@...ts.linux-foundation.org,
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next] vhost_net: stop rx net polling when possible

On Fri, Aug 15, 2014 at 11:40:08AM +0800, Jason Wang wrote:
> After rx vq was enabled, we never stop polling its socket. This is sub optimal
> when may lead unnecessary wake-ups after the rx net work has already been
> queued. This could be optimized by stopping polling the rx net sock when
> processing both rx and tx and restart it afterward. This could save unnecessary
> wake-ups and even unnecessary spin locks acquiring with the help of commit
> 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 "net-tun: restructure tun_do_read for
> better sleep/wakeup efficiency".

OK so the point is to avoid expensive wake_up_process calls?
It's a bit unfortunate that we are adding/removing things from wait
queue which certainly does take extra spin-locks.




> Test shows significant CPU% savings during almost all the cases:
> 
> Guest rx stream:
> size(B)/sessions/throughput/cpu/normalized thru/
> 64/1/+0.7773%   -8.6224%     +10.2866%
> 64/2/+0.6335%   -13.9109%    +16.8946%
> 64/4/-0.8182%   -14.8336%    +16.4565%
> 64/8/+0.4830%   -13.7675%    +16.5256%
> 256/1/-7.0963%  -12.6880%    +6.4043%
> 256/2/-1.3982%  -11.5424%    +11.4678%
> 256/4/-0.0350%  -11.8323%    +13.3806%
> 256/8/-1.5830%  -12.7693%    +12.8238%
> 1024/1/-7.4895% -19.1449%       +14.4152%
> 1024/2/-7.4575% -19.4018%       +14.8195%
> 1024/4/-0.3881% -9.1183%        +9.6061%
> 1024/8/+0.4713% -11.0155%       +12.9087%
> 4096/1/+0.8786%  -8.4050%        +10.1355%
> 4096/2/+0.0098%  -15.3094%       +18.0885%
> 4096/4/+0.0445%  -10.8247%       +12.1886%
> 4096/8/-2.1317%  -12.5111%       +11.8637%
> 16384/1/-0.0008% -6.1891%        +6.5966%
> 16384/2/-0.0117% -16.2716%       +19.4198%
> 16384/4/+0.0001% -5.9197%        +6.2923%
> 16384/8/+0.0173% -7.6681%        +8.3236%
> 65535/1/+0.0011% -10.3594%       +11.5578%
> 65535/2/-0.4108%  -14.4304%       +16.3838%
> 65535/4/+0.0011%  -10.3594%       +11.5578%
> 65535/8/-0.4108%  -14.4304%       +16.3838%
> 
> Guest tx stream:
> size(B)/sessions/throughput/cpu/normalized thru/
> 64/1/-0.6228%     -2.1936%     +1.6060%
> 64/2/+0.8646%     -3.5063%     +4.5297%
> 64/4/+0.8733%     -3.2495%     +4.2613%
> 64/8/+1.4290%     -3.5593%     +5.1724%
> 256/1/+7.2098%    -3.1122%     +10.6535%
> 256/2/-10.1408%   -6.8230%     -3.5607%
> 256/4/-11.3531%   -6.7085%     -4.9785%
> 256/8/-10.2723%   -6.5628%     -3.9701%
> 1024/1/-18.9329%  -13.6162%    -6.1547%
> 1024/2/-0.3728%   -1.3181%     +0.9580%
> 1024/4/+0.0125%   -3.6338%     +3.7838%
> 1024/8/-0.0030%   -2.7282%     +2.8017%
> 4096/1/+16.9367%  -1.9435%     +19.2543%
> 4096/2/+0.0121%   -6.1682%     +6.5866%
> 4096/4/+0.0019%   -3.8510%     +4.0072%
> 4096/8/-0.0222%   -4.1368%     +4.2922%
> 16384/1/-0.0026%  -8.6892%     +9.5132%
> 16384/2/-0.0012%  -10.1676%    +11.3171%
> 16384/4/+0.0196%  -1.2551%     +1.2908%
> 16384/8/+0.1303%  -3.2634%     +3.5082%
> 65535/1/+0.0019%  -3.4694%     +3.5961%
> 65535/2/-0.0003%  -0.7635%     +0.7690%
> 65535/4/-0.0219%  -2.7875%     +2.8448%
> 65535/8/+0.1137%  -2.7922%     +2.9894%
> 
> TCP_RR:
> size(B)/sessions/throughput/cpu/normalized thru/
> 256/1/+1.9004%    -4.7985%     +7.0366%
> 256/25/-4.7366%   -11.0809%    +7.1349%
> 256/50/+3.9808%   -5.2037%     +9.6887%
> 4096/1/+2.1619%   -0.7303%     +2.9134%
> 4096/25/-13.1836% -14.7298%    +1.8134%
> 4096/50/-11.1990% -15.4763%    +5.0605%
> 
> Signed-off-by: Jason Wang <jasowang@...hat.com>


Could you split RX/TX parts out please, and benchmark separately?

They are really independent.




> ---
>  drivers/vhost/net.c | 26 +++++++++++++++++++++-----
>  1 file changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 8dae2f7..d4a9742 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -334,6 +334,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> +	struct vhost_virtqueue *rx_vq = &net->vqs[VHOST_NET_VQ_RX].vq;
> +	struct vhost_poll *rx_poll = &net->poll[VHOST_NET_VQ_RX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
>  	unsigned out, in, s;
>  	int head;
> @@ -348,15 +350,18 @@ static void handle_tx(struct vhost_net *net)
>  	size_t len, total_len = 0;
>  	int err;
>  	size_t hdr_size;
> -	struct socket *sock;
> +	struct socket *sock, *rxsock;
>  	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
> -	bool zcopy, zcopy_used;
> +	bool zcopy, zcopy_used, poll = false;
>  
>  	mutex_lock(&vq->mutex);
> +	mutex_lock(&rx_vq->mutex);
>  	sock = vq->private_data;
> +	rxsock = rx_vq->private_data;
>  	if (!sock)
>  		goto out;
>  
> +	vhost_poll_stop(rx_poll);
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	hdr_size = nvq->vhost_hlen;
> @@ -451,11 +456,17 @@ static void handle_tx(struct vhost_net *net)
>  		total_len += len;
>  		vhost_net_tx_packet(net);
>  		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
> -			vhost_poll_queue(&vq->poll);
> +			poll = true;
>  			break;
>  		}
>  	}
> +
> +	if (rxsock)
> +		vhost_poll_start(rx_poll, rxsock->file);
> +	if (poll)
> +		vhost_poll_queue(&vq->poll);
>  out:
> +	mutex_unlock(&rx_vq->mutex);
>  	mutex_unlock(&vq->mutex);
>  }
>  
> @@ -554,6 +565,7 @@ err:
>  static void handle_rx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
> +	struct vhost_poll *poll = &net->poll[VHOST_NET_VQ_RX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
>  	unsigned uninitialized_var(in), log;
>  	struct vhost_log *vq_log;
> @@ -580,6 +592,8 @@ static void handle_rx(struct vhost_net *net)
>  	sock = vq->private_data;
>  	if (!sock)
>  		goto out;
> +
> +	vhost_poll_stop(poll);
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	vhost_hlen = nvq->vhost_hlen;
> @@ -660,10 +674,12 @@ static void handle_rx(struct vhost_net *net)
>  			vhost_log_write(vq, vq_log, log, vhost_len);
>  		total_len += vhost_len;
>  		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
> -			vhost_poll_queue(&vq->poll);
> -			break;
> +			vhost_poll_queue(poll);
> +			goto out;
>  		}
>  	}
> +
> +	vhost_poll_start(poll, sock->file);
>  out:
>  	mutex_unlock(&vq->mutex);
>  }
> -- 
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ