[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJaqyWcW2r9bos3CPxE4yZfZDORzki1SKSyODSgJRLXocOam1w@mail.gmail.com>
Date: Thu, 29 Aug 2024 17:15:00 +0200
From: Eugenio Perez Martin <eperezma@...hat.com>
To: Dragos Tatulea <dtatulea@...dia.com>
Cc: Jason Wang <jasowang@...hat.com>, Si-Wei Liu <si-wei.liu@...cle.com>,
virtualization <virtualization@...ts.linux-foundation.org>, Gal Pressman <gal@...dia.com>,
kvm list <kvm@...r.kernel.org>, linux-kernel <linux-kernel@...r.kernel.org>,
Parav Pandit <parav@...dia.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Cosmin Ratiu <cratiu@...dia.com>, Michael Tsirkin <mst@...hat.com>
Subject: Re: [PATCH vhost 1/7] vdpa/mlx5: Create direct MKEYs in parallel
On Thu, Aug 29, 2024 at 3:54 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
>
>
>
> On 29.08.24 15:10, Eugenio Perez Martin wrote:
> > On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
> >>
> >> Use the async interface to issue MTT MKEY creation.
> >> Extra care is taken at the allocation of FW input commands
> >> due to the MTT tables having variable sizes depending on
> >> MR.
> >>
> >> The indirect MKEY is still created synchronously at the
> >> end as the direct MKEYs need to be filled in.
> >>
> >> This makes create_user_mr() 3-5x faster, depending on
> >> the size of the MR.
> >>
> >> Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
> >> Reviewed-by: Cosmin Ratiu <cratiu@...dia.com>
> >> ---
> >> drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++-------
> >> 1 file changed, 96 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >> index 4758914ccf86..66e6a15f823f 100644
> >> --- a/drivers/vdpa/mlx5/core/mr.c
> >> +++ b/drivers/vdpa/mlx5/core/mr.c
> >> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
> >> }
> >> }
> >>
> >> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
> >> +struct mlx5_create_mkey_mem {
> >> + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
> >> + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
> >> + DECLARE_FLEX_ARRAY(__be64, mtt);
> >
> > I may be missing something obvious, but why do we need
> > DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in
> > special cases like uapi headers and we can use "__be64 mtt[]" here.
> >
> checkpatch.pl was complaining about it because in my initial version I
> used the "[0]" version of zero length based arrays.
>
> My impression was that DECLARE_FLEX_ARRAY is preferred option because it
> triggers a compiler error if the zero lenth array is not at the end of
> the struct. But on closer inspection I see that using the right C99
> empty brackets notation is enough to trigger this error.
> DECLARE_FLEX_ARRAY seems to be useful for the union case.
>
> I will change it in a v2.
>
> >> +};
> >> +
> >> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
> >> + struct mlx5_vdpa_direct_mr *mr,
> >> + struct mlx5_create_mkey_mem *mem)
> >> {
> >> - int inlen;
> >> + void *in = &mem->in;
> >> void *mkc;
> >> - void *in;
> >> - int err;
> >> -
> >> - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
> >> - in = kvzalloc(inlen, GFP_KERNEL);
> >> - if (!in)
> >> - return -ENOMEM;
> >>
> >> MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
> >> mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> >> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
> >> MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
> >> get_octo_len(mr->end - mr->start, mr->log_size));
> >> populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
> >> - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
> >> - kvfree(in);
> >> - if (err) {
> >> - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
> >> - return err;
> >> - }
> >>
> >> - return 0;
> >> + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
> >> + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
> >> +}
> >> +
> >> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
> >> + struct mlx5_vdpa_direct_mr *mr,
> >> + struct mlx5_create_mkey_mem *mem)
> >> +{
> >> + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
> >> +
> >> + mr->mr = mlx5_idx_to_mkey(mkey_index);
> >> }
> >>
> >> static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
> >> {
> >> + if (!mr->mr)
> >> + return;
> >> +
> >> mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
> >> }
> >>
> >> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms)
> >> return 16 * ALIGN(nklms, 4);
> >> }
> >>
> >> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> >> +{
> >> + struct mlx5_vdpa_async_cmd *cmds = NULL;
> >> + struct mlx5_vdpa_direct_mr *dmr;
> >> + int err = 0;
> >> + int i = 0;
> >> +
> >> + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
> >> + if (!cmds)
> >> + return -ENOMEM;
> >
> > Nit: this could benefit from Scope-based Cleanup Helpers [1], as it
> > would make clear that memory is always removed at the end of the
> > function & could prevent errors if the function is modified in the
> > future. I'm a big fan of them so my opinion may be biased though :).
> >
> > It would be great to be able to remove the array members with that
> > too, but I think the kernel doesn't have any facility for that.
> >
> I didn't know about those. It sounds like a good idea! I will try
> to use them in v2.
>
> >> +
> >> + list_for_each_entry(dmr, &mr->head, list) {
> >> + struct mlx5_create_mkey_mem *cmd_mem;
> >> + int mttlen, mttcount;
> >> +
> >> + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16);
> >
> > I don't get the roundup here, I guess it is so the driver does not
> > send potentially uninitialized memory to the device? Maybe the 16
> > should be a macro?
> >
> The roundup is a hw requirement. A define would be a good idea. Will add
> it.
>
> >> + mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
> >> + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
> >> + if (!cmd_mem) {
> >> + err = -ENOMEM;
> >> + goto done;
> >> + }
> >> +
> >> + cmds[i].out = cmd_mem->out;
> >> + cmds[i].outlen = sizeof(cmd_mem->out);
> >> + cmds[i].in = cmd_mem->in;
> >> + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
> >> +
> >> + fill_create_direct_mr(mvdev, dmr, cmd_mem);
> >> +
> >> + i++;
> >> + }
> >> +
> >> + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
> >> + if (err) {
> >> +
> >> + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
> >> + goto done;
> >> + }
> >> +
> >> + i = 0;
> >> + list_for_each_entry(dmr, &mr->head, list) {
> >> + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
> >> + struct mlx5_create_mkey_mem *cmd_mem;
> >> +
> >> + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
> >> +
> >> + if (!cmd->err) {
> >> + create_direct_mr_end(mvdev, dmr, cmd_mem);
> >
> > The caller function doesn't trust the result if we return an error.
> > Why not use the previous loop to call create_direct_mr_end? Am I
> > missing any side effect?
> >
> Which previous loop? We have the mkey value only after the command has
> been executed.
Ok, now I see what I proposed didn't make sense, thanks!
> I added the if here (instead of always calling
> create_direct_mr_end()) just to make things more explicit for the
> reader.
>
> >> + } else {
> >> + err = err ? err : cmd->err;
> >> + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
> >> + dmr->start, dmr->end, cmd->err);
> >> + }
> >> + }
> >> +
> >> +done:
> >> + for (i = i-1; i >= 0; i--) {
> >> + struct mlx5_create_mkey_mem *cmd_mem;
> >> +
> >> + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
> >> + kvfree(cmd_mem);
> >> + }
> >> +
> >> + kvfree(cmds);
> >> + return err;
> >> +}
> >> +
> >> static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> >> {
> >> int inlen;
> >> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
> >> goto err_map;
> >> }
> >>
> >> - err = create_direct_mr(mvdev, mr);
> >> - if (err)
> >> - goto err_direct;
> >> -
> >> return 0;
> >>
> >> -err_direct:
> >> - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
> >> err_map:
> >> sg_free_table(&mr->sg_head);
> >> return err;
> >> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
> >> if (err)
> >> goto err_chain;
> >>
> >> + err = create_direct_keys(mvdev, mr);
> >> + if (err)
> >> + goto err_chain;
> >> +
> >> /* Create the memory key that defines the guests's address space. This
> >> * memory key refers to the direct keys that contain the MTT
> >> * translations
> >> --
> >> 2.45.1
> >>
> >
>
Powered by blists - more mailing lists