lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 22 Apr 2022 15:46:51 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     zhenwei pi <pizhenwei@...edance.com>, arei.gonglei@...wei.com,
        mst@...hat.com
Cc:     herbert@...dor.apana.org.au, linux-kernel@...r.kernel.org,
        virtualization@...ts.linux-foundation.org,
        linux-crypto@...r.kernel.org, helei.sig11@...edance.com,
        davem@...emloft.net
Subject: Re: [PATCH v3 2/5] virtio-crypto: wait ctrl queue instead of busy
 polling


在 2022/4/21 18:40, zhenwei pi 写道:
> Originally, after submitting request into virtio crypto control
> queue, the guest side polls the result from the virt queue. This
> works like following:
>      CPU0   CPU1               ...             CPUx  CPUy
>       |      |                                  |     |
>       \      \                                  /     /
>        \--------spin_lock(&vcrypto->ctrl_lock)-------/
>                             |
>                   virtqueue add & kick
>                             |
>                    busy poll virtqueue
>                             |
>                spin_unlock(&vcrypto->ctrl_lock)
>                            ...
>
> There are two problems:
> 1, The queue depth is always 1, the performance of a virtio crypto
>     device gets limited. Multi user processes share a single control
>     queue, and hit spin lock race from control queue. Test on Intel
>     Platinum 8260, a single worker gets ~35K/s create/close session
>     operations, and 8 workers get ~40K/s operations with 800% CPU
>     utilization.
> 2, The control request is supposed to get handled immediately, but
>     in the current implementation of QEMU(v6.2), the vCPU thread kicks
>     another thread to do this work, the latency also gets unstable.
>     Tracking latency of virtio_crypto_alg_akcipher_close_session in 5s:
>          usecs               : count     distribution
>           0 -> 1          : 0        |                        |
>           2 -> 3          : 7        |                        |
>           4 -> 7          : 72       |                        |
>           8 -> 15         : 186485   |************************|
>          16 -> 31         : 687      |                        |
>          32 -> 63         : 5        |                        |
>          64 -> 127        : 3        |                        |
>         128 -> 255        : 1        |                        |
>         256 -> 511        : 0        |                        |
>         512 -> 1023       : 0        |                        |
>        1024 -> 2047       : 0        |                        |
>        2048 -> 4095       : 0        |                        |
>        4096 -> 8191       : 0        |                        |
>        8192 -> 16383      : 2        |                        |
>     This means that a CPU may hold vcrypto->ctrl_lock as long as 8192~16383us.
>
> To improve the performance of control queue, a request on control queue waits
> completion instead of busy polling to reduce lock racing, and gets completed by
> control queue callback.
>      CPU0   CPU1               ...             CPUx  CPUy
>       |      |                                  |     |
>       \      \                                  /     /
>        \--------spin_lock(&vcrypto->ctrl_lock)-------/
>                             |
>                   virtqueue add & kick
>                             |
>        ---------spin_unlock(&vcrypto->ctrl_lock)------
>       /      /                                  \     \
>       |      |                                  |     |
>      wait   wait                               wait  wait
>
> Test this patch, the guest side get ~200K/s operations with 300% CPU
> utilization.
>
> Cc: Michael S. Tsirkin <mst@...hat.com>
> Cc: Jason Wang <jasowang@...hat.com>
> Cc: Gonglei <arei.gonglei@...wei.com>
> Signed-off-by: zhenwei pi <pizhenwei@...edance.com>
> ---
>   drivers/crypto/virtio/virtio_crypto_common.c | 42 +++++++++++++++-----
>   drivers/crypto/virtio/virtio_crypto_common.h |  8 ++++
>   drivers/crypto/virtio/virtio_crypto_core.c   |  2 +-
>   3 files changed, 41 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/crypto/virtio/virtio_crypto_common.c b/drivers/crypto/virtio/virtio_crypto_common.c
> index e65125a74db2..93df73c40dd3 100644
> --- a/drivers/crypto/virtio/virtio_crypto_common.c
> +++ b/drivers/crypto/virtio/virtio_crypto_common.c
> @@ -8,14 +8,21 @@
>   
>   #include "virtio_crypto_common.h"
>   
> +static void virtio_crypto_ctrlq_callback(struct virtio_crypto_ctrl_request *vc_ctrl_req)
> +{
> +	complete(&vc_ctrl_req->compl);
> +}
> +
>   int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[],
>   				  unsigned int out_sgs, unsigned int in_sgs,
>   				  struct virtio_crypto_ctrl_request *vc_ctrl_req)
>   {
>   	int err;
> -	unsigned int inlen;
>   	unsigned long flags;
>   
> +	init_completion(&vc_ctrl_req->compl);
> +	vc_ctrl_req->ctrl_cb =  virtio_crypto_ctrlq_callback;


Is there a chance that the cb would not be virtio_crypto_ctrlq_callback()?


> +
>   	spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
>   	err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, out_sgs, in_sgs, vc_ctrl_req, GFP_ATOMIC);
>   	if (err < 0) {
> @@ -24,16 +31,31 @@ int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterl
>   	}
>   
>   	virtqueue_kick(vcrypto->ctrl_vq);
> -
> -	/*
> -	 * Trapping into the hypervisor, so the request should be
> -	 * handled immediately.
> -	 */
> -	while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) &&
> -		!virtqueue_is_broken(vcrypto->ctrl_vq))
> -		cpu_relax();
> -
>   	spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
>   
> +	wait_for_completion(&vc_ctrl_req->compl);
> +
>   	return 0;
>   }
> +
> +void virtcrypto_ctrlq_callback(struct virtqueue *vq)
> +{
> +	struct virtio_crypto *vcrypto = vq->vdev->priv;
> +	struct virtio_crypto_ctrl_request *vc_ctrl_req;
> +	unsigned long flags;
> +	unsigned int len;
> +
> +	spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
> +	do {
> +		virtqueue_disable_cb(vq);
> +		while ((vc_ctrl_req = virtqueue_get_buf(vq, &len)) != NULL) {
> +			spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
> +			if (vc_ctrl_req->ctrl_cb)
> +				vc_ctrl_req->ctrl_cb(vc_ctrl_req);
> +			spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
> +		}
> +		if (unlikely(virtqueue_is_broken(vq)))
> +			break;
> +	} while (!virtqueue_enable_cb(vq));
> +	spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
> +}
> diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
> index d2a20fe6e13e..25b4f22e8605 100644
> --- a/drivers/crypto/virtio/virtio_crypto_common.h
> +++ b/drivers/crypto/virtio/virtio_crypto_common.h
> @@ -81,6 +81,10 @@ struct virtio_crypto_sym_session_info {
>   	__u64 session_id;
>   };
>   
> +struct virtio_crypto_ctrl_request;
> +typedef void (*virtio_crypto_ctrl_callback)
> +		(struct virtio_crypto_ctrl_request *vc_ctrl_req);
> +
>   /*
>    * Note: there are padding fields in request, clear them to zero before sending to host,
>    * Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48]
> @@ -89,6 +93,8 @@ struct virtio_crypto_ctrl_request {
>   	struct virtio_crypto_op_ctrl_req ctrl;
>   	struct virtio_crypto_session_input input;
>   	struct virtio_crypto_inhdr ctrl_status;
> +	virtio_crypto_ctrl_callback ctrl_cb;
> +	struct completion compl;
>   };
>   
>   struct virtio_crypto_request;
> @@ -141,7 +147,9 @@ void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto);
>   int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto);
>   void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto);
>   
> +void virtcrypto_ctrlq_callback(struct virtqueue *vq);
>   int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[],
>   				  unsigned int out_sgs, unsigned int in_sgs,
>   				  struct virtio_crypto_ctrl_request *vc_ctrl_req);
> +


Unnecessary changes.

Thanks


>   #endif /* _VIRTIO_CRYPTO_COMMON_H */
> diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
> index c6f482db0bc0..e668d4b1bc6a 100644
> --- a/drivers/crypto/virtio/virtio_crypto_core.c
> +++ b/drivers/crypto/virtio/virtio_crypto_core.c
> @@ -73,7 +73,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
>   		goto err_names;
>   
>   	/* Parameters for control virtqueue */
> -	callbacks[total_vqs - 1] = NULL;
> +	callbacks[total_vqs - 1] = virtcrypto_ctrlq_callback;
>   	names[total_vqs - 1] = "controlq";
>   
>   	/* Allocate/initialize parameters for data virtqueues */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ