[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <67690268-0bd9-688e-8bf1-8df28351682e@bytedance.com>
Date: Fri, 22 Apr 2022 16:32:40 +0800
From: zhenwei pi <pizhenwei@...edance.com>
To: Jason Wang <jasowang@...hat.com>, arei.gonglei@...wei.com,
mst@...hat.com
Cc: herbert@...dor.apana.org.au, linux-kernel@...r.kernel.org,
virtualization@...ts.linux-foundation.org,
linux-crypto@...r.kernel.org, helei.sig11@...edance.com,
davem@...emloft.net
Subject: Re: Re: [PATCH v3 2/5] virtio-crypto: wait ctrl queue instead of busy
polling
On 4/22/22 15:46, Jason Wang wrote:
>
> 在 2022/4/21 18:40, zhenwei pi 写道:
>> Originally, after submitting request into virtio crypto control
>> queue, the guest side polls the result from the virt queue. This
>> works like following:
>> CPU0 CPU1 ... CPUx CPUy
>> | | | |
>> \ \ / /
>> \--------spin_lock(&vcrypto->ctrl_lock)-------/
>> |
>> virtqueue add & kick
>> |
>> busy poll virtqueue
>> |
>> spin_unlock(&vcrypto->ctrl_lock)
>> ...
>>
>> There are two problems:
>> 1, The queue depth is always 1, the performance of a virtio crypto
>> device gets limited. Multi user processes share a single control
>> queue, and hit spin lock race from control queue. Test on Intel
>> Platinum 8260, a single worker gets ~35K/s create/close session
>> operations, and 8 workers get ~40K/s operations with 800% CPU
>> utilization.
>> 2, The control request is supposed to get handled immediately, but
>> in the current implementation of QEMU(v6.2), the vCPU thread kicks
>> another thread to do this work, the latency also gets unstable.
>> Tracking latency of virtio_crypto_alg_akcipher_close_session in 5s:
>> usecs : count distribution
>> 0 -> 1 : 0 | |
>> 2 -> 3 : 7 | |
>> 4 -> 7 : 72 | |
>> 8 -> 15 : 186485 |************************|
>> 16 -> 31 : 687 | |
>> 32 -> 63 : 5 | |
>> 64 -> 127 : 3 | |
>> 128 -> 255 : 1 | |
>> 256 -> 511 : 0 | |
>> 512 -> 1023 : 0 | |
>> 1024 -> 2047 : 0 | |
>> 2048 -> 4095 : 0 | |
>> 4096 -> 8191 : 0 | |
>> 8192 -> 16383 : 2 | |
>> This means that a CPU may hold vcrypto->ctrl_lock as long as
>> 8192~16383us.
>>
>> To improve the performance of control queue, a request on control
>> queue waits
>> completion instead of busy polling to reduce lock racing, and gets
>> completed by
>> control queue callback.
>> CPU0 CPU1 ... CPUx CPUy
>> | | | |
>> \ \ / /
>> \--------spin_lock(&vcrypto->ctrl_lock)-------/
>> |
>> virtqueue add & kick
>> |
>> ---------spin_unlock(&vcrypto->ctrl_lock)------
>> / / \ \
>> | | | |
>> wait wait wait wait
>>
>> Test this patch, the guest side get ~200K/s operations with 300% CPU
>> utilization.
>>
>> Cc: Michael S. Tsirkin <mst@...hat.com>
>> Cc: Jason Wang <jasowang@...hat.com>
>> Cc: Gonglei <arei.gonglei@...wei.com>
>> Signed-off-by: zhenwei pi <pizhenwei@...edance.com>
>> ---
>> drivers/crypto/virtio/virtio_crypto_common.c | 42 +++++++++++++++-----
>> drivers/crypto/virtio/virtio_crypto_common.h | 8 ++++
>> drivers/crypto/virtio/virtio_crypto_core.c | 2 +-
>> 3 files changed, 41 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/crypto/virtio/virtio_crypto_common.c
>> b/drivers/crypto/virtio/virtio_crypto_common.c
>> index e65125a74db2..93df73c40dd3 100644
>> --- a/drivers/crypto/virtio/virtio_crypto_common.c
>> +++ b/drivers/crypto/virtio/virtio_crypto_common.c
>> @@ -8,14 +8,21 @@
>> #include "virtio_crypto_common.h"
>> +static void virtio_crypto_ctrlq_callback(struct
>> virtio_crypto_ctrl_request *vc_ctrl_req)
>> +{
>> + complete(&vc_ctrl_req->compl);
>> +}
>> +
>> int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto,
>> struct scatterlist *sgs[],
>> unsigned int out_sgs, unsigned int in_sgs,
>> struct virtio_crypto_ctrl_request *vc_ctrl_req)
>> {
>> int err;
>> - unsigned int inlen;
>> unsigned long flags;
>> + init_completion(&vc_ctrl_req->compl);
>> + vc_ctrl_req->ctrl_cb = virtio_crypto_ctrlq_callback;
>
>
> Is there a chance that the cb would not be virtio_crypto_ctrlq_callback()?
>
Yes, it's the only callback function used for control queue, removing
this and calling virtio_crypto_ctrlq_callback directly in
virtcrypto_ctrlq_callback seems better. I'll fix this in the next version.
>
>> +
>> spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
>> err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, out_sgs, in_sgs,
>> vc_ctrl_req, GFP_ATOMIC);
>> if (err < 0) {
>> @@ -24,16 +31,31 @@ int virtio_crypto_ctrl_vq_request(struct
>> virtio_crypto *vcrypto, struct scatterl
>> }
>> virtqueue_kick(vcrypto->ctrl_vq);
>> -
>> - /*
>> - * Trapping into the hypervisor, so the request should be
>> - * handled immediately.
>> - */
>> - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) &&
>> - !virtqueue_is_broken(vcrypto->ctrl_vq))
>> - cpu_relax();
>> -
>> spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
>> + wait_for_completion(&vc_ctrl_req->compl);
>> +
>> return 0;
>> }
>> +
>> +void virtcrypto_ctrlq_callback(struct virtqueue *vq)
>> +{
>> + struct virtio_crypto *vcrypto = vq->vdev->priv;
>> + struct virtio_crypto_ctrl_request *vc_ctrl_req;
>> + unsigned long flags;
>> + unsigned int len;
>> +
>> + spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
>> + do {
>> + virtqueue_disable_cb(vq);
>> + while ((vc_ctrl_req = virtqueue_get_buf(vq, &len)) != NULL) {
>> + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
>> + if (vc_ctrl_req->ctrl_cb)
>> + vc_ctrl_req->ctrl_cb(vc_ctrl_req);
>> + spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
>> + }
>> + if (unlikely(virtqueue_is_broken(vq)))
>> + break;
>> + } while (!virtqueue_enable_cb(vq));
>> + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
>> +}
>> diff --git a/drivers/crypto/virtio/virtio_crypto_common.h
>> b/drivers/crypto/virtio/virtio_crypto_common.h
>> index d2a20fe6e13e..25b4f22e8605 100644
>> --- a/drivers/crypto/virtio/virtio_crypto_common.h
>> +++ b/drivers/crypto/virtio/virtio_crypto_common.h
>> @@ -81,6 +81,10 @@ struct virtio_crypto_sym_session_info {
>> __u64 session_id;
>> };
>> +struct virtio_crypto_ctrl_request;
>> +typedef void (*virtio_crypto_ctrl_callback)
>> + (struct virtio_crypto_ctrl_request *vc_ctrl_req);
>> +
>> /*
>> * Note: there are padding fields in request, clear them to zero
>> before sending to host,
>> * Ex,
>> virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48]
>> @@ -89,6 +93,8 @@ struct virtio_crypto_ctrl_request {
>> struct virtio_crypto_op_ctrl_req ctrl;
>> struct virtio_crypto_session_input input;
>> struct virtio_crypto_inhdr ctrl_status;
>> + virtio_crypto_ctrl_callback ctrl_cb;
>> + struct completion compl;
>> };
>> struct virtio_crypto_request;
>> @@ -141,7 +147,9 @@ void virtio_crypto_skcipher_algs_unregister(struct
>> virtio_crypto *vcrypto);
>> int virtio_crypto_akcipher_algs_register(struct virtio_crypto
>> *vcrypto);
>> void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto
>> *vcrypto);
>> +void virtcrypto_ctrlq_callback(struct virtqueue *vq);
>> int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto,
>> struct scatterlist *sgs[],
>> unsigned int out_sgs, unsigned int in_sgs,
>> struct virtio_crypto_ctrl_request *vc_ctrl_req);
>> +
>
>
> Unnecessary changes.
>
> Thanks
>
>
>> #endif /* _VIRTIO_CRYPTO_COMMON_H */
>> diff --git a/drivers/crypto/virtio/virtio_crypto_core.c
>> b/drivers/crypto/virtio/virtio_crypto_core.c
>> index c6f482db0bc0..e668d4b1bc6a 100644
>> --- a/drivers/crypto/virtio/virtio_crypto_core.c
>> +++ b/drivers/crypto/virtio/virtio_crypto_core.c
>> @@ -73,7 +73,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto
>> *vi)
>> goto err_names;
>> /* Parameters for control virtqueue */
>> - callbacks[total_vqs - 1] = NULL;
>> + callbacks[total_vqs - 1] = virtcrypto_ctrlq_callback;
>> names[total_vqs - 1] = "controlq";
>> /* Allocate/initialize parameters for data virtqueues */
>
--
zhenwei pi
Powered by blists - more mailing lists