lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJaqyWcW2r9bos3CPxE4yZfZDORzki1SKSyODSgJRLXocOam1w@mail.gmail.com>
Date: Thu, 29 Aug 2024 17:15:00 +0200
From: Eugenio Perez Martin <eperezma@...hat.com>
To: Dragos Tatulea <dtatulea@...dia.com>
Cc: Jason Wang <jasowang@...hat.com>, Si-Wei Liu <si-wei.liu@...cle.com>, 
	virtualization <virtualization@...ts.linux-foundation.org>, Gal Pressman <gal@...dia.com>, 
	kvm list <kvm@...r.kernel.org>, linux-kernel <linux-kernel@...r.kernel.org>, 
	Parav Pandit <parav@...dia.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, 
	Cosmin Ratiu <cratiu@...dia.com>, Michael Tsirkin <mst@...hat.com>
Subject: Re: [PATCH vhost 1/7] vdpa/mlx5: Create direct MKEYs in parallel

On Thu, Aug 29, 2024 at 3:54 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
>
>
>
> On 29.08.24 15:10, Eugenio Perez Martin wrote:
> > On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
> >>
> >> Use the async interface to issue MTT MKEY creation.
> >> Extra care is taken at the allocation of FW input commands
> >> due to the MTT tables having variable sizes depending on
> >> MR.
> >>
> >> The indirect MKEY is still created synchronously at the
> >> end as the direct MKEYs need to be filled in.
> >>
> >> This makes create_user_mr() 3-5x faster, depending on
> >> the size of the MR.
> >>
> >> Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
> >> Reviewed-by: Cosmin Ratiu <cratiu@...dia.com>
> >> ---
> >>  drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++-------
> >>  1 file changed, 96 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >> index 4758914ccf86..66e6a15f823f 100644
> >> --- a/drivers/vdpa/mlx5/core/mr.c
> >> +++ b/drivers/vdpa/mlx5/core/mr.c
> >> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
> >>         }
> >>  }
> >>
> >> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
> >> +struct mlx5_create_mkey_mem {
> >> +       u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
> >> +       u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
> >> +       DECLARE_FLEX_ARRAY(__be64, mtt);
> >
> > I may be missing something obvious, but why do we need
> > DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in
> > special cases like uapi headers and we can use "__be64 mtt[]" here.
> >
> checkpatch.pl was complaining about it because in my initial version I
> used the "[0]" version of zero length based arrays.
>
> My impression was that DECLARE_FLEX_ARRAY is preferred option because it
> triggers a compiler error if the zero lenth array is not at the end of
> the struct. But on closer inspection I see that using the right C99
> empty brackets notation is enough to trigger this error.
> DECLARE_FLEX_ARRAY seems to be useful for the union case.
>
> I will change it in a v2.
>
> >> +};
> >> +
> >> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
> >> +                                 struct mlx5_vdpa_direct_mr *mr,
> >> +                                 struct mlx5_create_mkey_mem *mem)
> >>  {
> >> -       int inlen;
> >> +       void *in = &mem->in;
> >>         void *mkc;
> >> -       void *in;
> >> -       int err;
> >> -
> >> -       inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
> >> -       in = kvzalloc(inlen, GFP_KERNEL);
> >> -       if (!in)
> >> -               return -ENOMEM;
> >>
> >>         MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
> >>         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> >> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
> >>         MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
> >>                  get_octo_len(mr->end - mr->start, mr->log_size));
> >>         populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
> >> -       err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
> >> -       kvfree(in);
> >> -       if (err) {
> >> -               mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
> >> -               return err;
> >> -       }
> >>
> >> -       return 0;
> >> +       MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
> >> +       MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
> >> +}
> >> +
> >> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
> >> +                                struct mlx5_vdpa_direct_mr *mr,
> >> +                                struct mlx5_create_mkey_mem *mem)
> >> +{
> >> +       u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
> >> +
> >> +       mr->mr = mlx5_idx_to_mkey(mkey_index);
> >>  }
> >>
> >>  static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
> >>  {
> >> +       if (!mr->mr)
> >> +               return;
> >> +
> >>         mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
> >>  }
> >>
> >> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms)
> >>         return 16 * ALIGN(nklms, 4);
> >>  }
> >>
> >> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> >> +{
> >> +       struct mlx5_vdpa_async_cmd *cmds = NULL;
> >> +       struct mlx5_vdpa_direct_mr *dmr;
> >> +       int err = 0;
> >> +       int i = 0;
> >> +
> >> +       cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
> >> +       if (!cmds)
> >> +               return -ENOMEM;
> >
> > Nit: this could benefit from Scope-based Cleanup Helpers [1], as it
> > would make clear that memory is always removed at the end of the
> > function & could prevent errors if the function is modified in the
> > future. I'm a big fan of them so my opinion may be biased though :).
> >
> > It would be great to be able to remove the array members with that
> > too, but I think the kernel doesn't have any facility for that.
> >
> I didn't know about those. It sounds like a good idea! I will try
> to use them in v2.
>
> >> +
> >> +       list_for_each_entry(dmr, &mr->head, list) {
> >> +               struct mlx5_create_mkey_mem *cmd_mem;
> >> +               int mttlen, mttcount;
> >> +
> >> +               mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16);
> >
> > I don't get the roundup here, I guess it is so the driver does not
> > send potentially uninitialized memory to the device? Maybe the 16
> > should be a macro?
> >
> The roundup is a hw requirement. A define would be a good idea. Will add
> it.
>
> >> +               mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
> >> +               cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
> >> +               if (!cmd_mem) {
> >> +                       err = -ENOMEM;
> >> +                       goto done;
> >> +               }
> >> +
> >> +               cmds[i].out = cmd_mem->out;
> >> +               cmds[i].outlen = sizeof(cmd_mem->out);
> >> +               cmds[i].in = cmd_mem->in;
> >> +               cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
> >> +
> >> +               fill_create_direct_mr(mvdev, dmr, cmd_mem);
> >> +
> >> +               i++;
> >> +       }
> >> +
> >> +       err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
> >> +       if (err) {
> >> +
> >> +               mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
> >> +               goto done;
> >> +       }
> >> +
> >> +       i = 0;
> >> +       list_for_each_entry(dmr, &mr->head, list) {
> >> +               struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
> >> +               struct mlx5_create_mkey_mem *cmd_mem;
> >> +
> >> +               cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
> >> +
> >> +               if (!cmd->err) {
> >> +                       create_direct_mr_end(mvdev, dmr, cmd_mem);
> >
> > The caller function doesn't trust the result if we return an error.
> > Why not use the previous loop to call create_direct_mr_end? Am I
> > missing any side effect?
> >
> Which previous loop? We have the mkey value only after the command has
> been executed.

Ok, now I see what I proposed didn't make sense, thanks!

> I added the if here (instead of always calling
> create_direct_mr_end()) just to make things more explicit for the
> reader.
>
> >> +               } else {
> >> +                       err = err ? err : cmd->err;
> >> +                       mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
> >> +                               dmr->start, dmr->end, cmd->err);
> >> +               }
> >> +       }
> >> +
> >> +done:
> >> +       for (i = i-1; i >= 0; i--) {
> >> +               struct mlx5_create_mkey_mem *cmd_mem;
> >> +
> >> +               cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
> >> +               kvfree(cmd_mem);
> >> +       }
> >> +
> >> +       kvfree(cmds);
> >> +       return err;
> >> +}
> >> +
> >>  static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> >>  {
> >>         int inlen;
> >> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
> >>                 goto err_map;
> >>         }
> >>
> >> -       err = create_direct_mr(mvdev, mr);
> >> -       if (err)
> >> -               goto err_direct;
> >> -
> >>         return 0;
> >>
> >> -err_direct:
> >> -       dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
> >>  err_map:
> >>         sg_free_table(&mr->sg_head);
> >>         return err;
> >> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
> >>         if (err)
> >>                 goto err_chain;
> >>
> >> +       err = create_direct_keys(mvdev, mr);
> >> +       if (err)
> >> +               goto err_chain;
> >> +
> >>         /* Create the memory key that defines the guests's address space. This
> >>          * memory key refers to the direct keys that contain the MTT
> >>          * translations
> >> --
> >> 2.45.1
> >>
> >
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ