[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <9f9a8d39-61ca-45df-a172-02831ed536fa@nvidia.com>
Date: Fri, 30 Aug 2024 13:09:46 +0200
From: Dragos Tatulea <dtatulea@...dia.com>
To: Eugenio Perez Martin <eperezma@...hat.com>
Cc: Jason Wang <jasowang@...hat.com>, Si-Wei Liu <si-wei.liu@...cle.com>,
virtualization <virtualization@...ts.linux-foundation.org>,
Gal Pressman <gal@...dia.com>, kvm list <kvm@...r.kernel.org>,
linux-kernel <linux-kernel@...r.kernel.org>, Parav Pandit
<parav@...dia.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Cosmin Ratiu <cratiu@...dia.com>, Michael Tsirkin <mst@...hat.com>
Subject: Re: [PATCH vhost 1/7] vdpa/mlx5: Create direct MKEYs in parallel
On 29.08.24 17:15, Eugenio Perez Martin wrote:
> On Thu, Aug 29, 2024 at 3:54 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
>>
>>
>>
>> On 29.08.24 15:10, Eugenio Perez Martin wrote:
>>> On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
>>>>
>>>> Use the async interface to issue MTT MKEY creation.
>>>> Extra care is taken at the allocation of FW input commands
>>>> due to the MTT tables having variable sizes depending on
>>>> MR.
>>>>
>>>> The indirect MKEY is still created synchronously at the
>>>> end as the direct MKEYs need to be filled in.
>>>>
>>>> This makes create_user_mr() 3-5x faster, depending on
>>>> the size of the MR.
>>>>
>>>> Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
>>>> Reviewed-by: Cosmin Ratiu <cratiu@...dia.com>
>>>> ---
>>>> drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++-------
>>>> 1 file changed, 96 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>>>> index 4758914ccf86..66e6a15f823f 100644
>>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>>> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
>>>> }
>>>> }
>>>>
>>>> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
>>>> +struct mlx5_create_mkey_mem {
>>>> + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
>>>> + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
>>>> + DECLARE_FLEX_ARRAY(__be64, mtt);
>>>
>>> I may be missing something obvious, but why do we need
>>> DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in
>>> special cases like uapi headers and we can use "__be64 mtt[]" here.
>>>
>> checkpatch.pl was complaining about it because in my initial version I
>> used the "[0]" version of zero length based arrays.
>>
>> My impression was that DECLARE_FLEX_ARRAY is preferred option because it
>> triggers a compiler error if the zero lenth array is not at the end of
>> the struct. But on closer inspection I see that using the right C99
>> empty brackets notation is enough to trigger this error.
>> DECLARE_FLEX_ARRAY seems to be useful for the union case.
>>
>> I will change it in a v2.
>>
>>>> +};
>>>> +
>>>> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
>>>> + struct mlx5_vdpa_direct_mr *mr,
>>>> + struct mlx5_create_mkey_mem *mem)
>>>> {
>>>> - int inlen;
>>>> + void *in = &mem->in;
>>>> void *mkc;
>>>> - void *in;
>>>> - int err;
>>>> -
>>>> - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
>>>> - in = kvzalloc(inlen, GFP_KERNEL);
>>>> - if (!in)
>>>> - return -ENOMEM;
>>>>
>>>> MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
>>>> mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
>>>> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
>>>> MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
>>>> get_octo_len(mr->end - mr->start, mr->log_size));
>>>> populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
>>>> - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
>>>> - kvfree(in);
>>>> - if (err) {
>>>> - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
>>>> - return err;
>>>> - }
>>>>
>>>> - return 0;
>>>> + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
>>>> + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
>>>> +}
>>>> +
>>>> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
>>>> + struct mlx5_vdpa_direct_mr *mr,
>>>> + struct mlx5_create_mkey_mem *mem)
>>>> +{
>>>> + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
>>>> +
>>>> + mr->mr = mlx5_idx_to_mkey(mkey_index);
>>>> }
>>>>
>>>> static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
>>>> {
>>>> + if (!mr->mr)
>>>> + return;
>>>> +
>>>> mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
>>>> }
>>>>
>>>> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms)
>>>> return 16 * ALIGN(nklms, 4);
>>>> }
>>>>
>>>> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>>>> +{
>>>> + struct mlx5_vdpa_async_cmd *cmds = NULL;
>>>> + struct mlx5_vdpa_direct_mr *dmr;
>>>> + int err = 0;
>>>> + int i = 0;
>>>> +
>>>> + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
>>>> + if (!cmds)
>>>> + return -ENOMEM;
>>>
>>> Nit: this could benefit from Scope-based Cleanup Helpers [1], as it
>>> would make clear that memory is always removed at the end of the
>>> function & could prevent errors if the function is modified in the
>>> future. I'm a big fan of them so my opinion may be biased though :).
>>>
>>> It would be great to be able to remove the array members with that
>>> too, but I think the kernel doesn't have any facility for that.
>>>
>> I didn't know about those. It sounds like a good idea! I will try
>> to use them in v2.
>>
I tried for this patch: doing the DECLARE_FREE only for the cmds
array is only more confusing because the goto done still has to
happen due to cmd_mem elements being of different size. I preferred
not to mix and match.
I also tried adding a ctor/dtor with DECLARE_CLASS that wraps both
of these resources: the problem there is that the ctor can't fail
so the allocation still has to happen in this function but
we stilll need an extra struct to hold the cmds + size. Overall the
code was introducing more confusion.
However, it works very well for the next patch so I used it there
in v2.
In the future I will look at introducing more of these, they are
interesting helpers.
Thanks,
Dragos
>>>> +
>>>> + list_for_each_entry(dmr, &mr->head, list) {
>>>> + struct mlx5_create_mkey_mem *cmd_mem;
>>>> + int mttlen, mttcount;
>>>> +
>>>> + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16);
>>>
>>> I don't get the roundup here, I guess it is so the driver does not
>>> send potentially uninitialized memory to the device? Maybe the 16
>>> should be a macro?
>>>
>> The roundup is a hw requirement. A define would be a good idea. Will add
>> it.
>>
>>>> + mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
>>>> + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
>>>> + if (!cmd_mem) {
>>>> + err = -ENOMEM;
>>>> + goto done;
>>>> + }
>>>> +
>>>> + cmds[i].out = cmd_mem->out;
>>>> + cmds[i].outlen = sizeof(cmd_mem->out);
>>>> + cmds[i].in = cmd_mem->in;
>>>> + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
>>>> +
>>>> + fill_create_direct_mr(mvdev, dmr, cmd_mem);
>>>> +
>>>> + i++;
>>>> + }
>>>> +
>>>> + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
>>>> + if (err) {
>>>> +
>>>> + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
>>>> + goto done;
>>>> + }
>>>> +
>>>> + i = 0;
>>>> + list_for_each_entry(dmr, &mr->head, list) {
>>>> + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
>>>> + struct mlx5_create_mkey_mem *cmd_mem;
>>>> +
>>>> + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
>>>> +
>>>> + if (!cmd->err) {
>>>> + create_direct_mr_end(mvdev, dmr, cmd_mem);
>>>
>>> The caller function doesn't trust the result if we return an error.
>>> Why not use the previous loop to call create_direct_mr_end? Am I
>>> missing any side effect?
>>>
>> Which previous loop? We have the mkey value only after the command has
>> been executed.
>
> Ok, now I see what I proposed didn't make sense, thanks!
>
>> I added the if here (instead of always calling
>> create_direct_mr_end()) just to make things more explicit for the
>> reader.
>>
>>>> + } else {
>>>> + err = err ? err : cmd->err;
>>>> + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
>>>> + dmr->start, dmr->end, cmd->err);
>>>> + }
>>>> + }
>>>> +
>>>> +done:
>>>> + for (i = i-1; i >= 0; i--) {
>>>> + struct mlx5_create_mkey_mem *cmd_mem;
>>>> +
>>>> + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
>>>> + kvfree(cmd_mem);
>>>> + }
>>>> +
>>>> + kvfree(cmds);
>>>> + return err;
>>>> +}
>>>> +
>>>> static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>>>> {
>>>> int inlen;
>>>> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
>>>> goto err_map;
>>>> }
>>>>
>>>> - err = create_direct_mr(mvdev, mr);
>>>> - if (err)
>>>> - goto err_direct;
>>>> -
>>>> return 0;
>>>>
>>>> -err_direct:
>>>> - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
>>>> err_map:
>>>> sg_free_table(&mr->sg_head);
>>>> return err;
>>>> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
>>>> if (err)
>>>> goto err_chain;
>>>>
>>>> + err = create_direct_keys(mvdev, mr);
>>>> + if (err)
>>>> + goto err_chain;
>>>> +
>>>> /* Create the memory key that defines the guests's address space. This
>>>> * memory key refers to the direct keys that contain the MTT
>>>> * translations
>>>> --
>>>> 2.45.1
>>>>
>>>
>>
>
Powered by blists - more mailing lists