lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJaqyWcFVLXg=nUdOJOviWA+TDfbqBeEHrVROVC1nYrO8+ZmhA@mail.gmail.com>
Date: Thu, 29 Aug 2024 15:10:03 +0200
From: Eugenio Perez Martin <eperezma@...hat.com>
To: Dragos Tatulea <dtatulea@...dia.com>
Cc: "Michael S . Tsirkin" <mst@...hat.com>, Jason Wang <jasowang@...hat.com>, 
	Si-Wei Liu <si-wei.liu@...cle.com>, virtualization@...ts.linux-foundation.org, 
	Gal Pressman <gal@...dia.com>, kvm@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Parav Pandit <parav@...dia.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, 
	Cosmin Ratiu <cratiu@...dia.com>
Subject: Re: [PATCH vhost 1/7] vdpa/mlx5: Create direct MKEYs in parallel

On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@...dia.com> wrote:
>
> Use the async interface to issue MTT MKEY creation.
> Extra care is taken at the allocation of FW input commands
> due to the MTT tables having variable sizes depending on
> MR.
>
> The indirect MKEY is still created synchronously at the
> end as the direct MKEYs need to be filled in.
>
> This makes create_user_mr() 3-5x faster, depending on
> the size of the MR.
>
> Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
> Reviewed-by: Cosmin Ratiu <cratiu@...dia.com>
> ---
>  drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++-------
>  1 file changed, 96 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index 4758914ccf86..66e6a15f823f 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
>         }
>  }
>
> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
> +struct mlx5_create_mkey_mem {
> +       u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
> +       u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
> +       DECLARE_FLEX_ARRAY(__be64, mtt);

I may be missing something obvious, but why do we need
DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in
special cases like uapi headers and we can use "__be64 mtt[]" here.

> +};
> +
> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
> +                                 struct mlx5_vdpa_direct_mr *mr,
> +                                 struct mlx5_create_mkey_mem *mem)
>  {
> -       int inlen;
> +       void *in = &mem->in;
>         void *mkc;
> -       void *in;
> -       int err;
> -
> -       inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
> -       in = kvzalloc(inlen, GFP_KERNEL);
> -       if (!in)
> -               return -ENOMEM;
>
>         MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
>         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
>         MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
>                  get_octo_len(mr->end - mr->start, mr->log_size));
>         populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
> -       err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
> -       kvfree(in);
> -       if (err) {
> -               mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
> -               return err;
> -       }
>
> -       return 0;
> +       MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
> +       MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
> +}
> +
> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
> +                                struct mlx5_vdpa_direct_mr *mr,
> +                                struct mlx5_create_mkey_mem *mem)
> +{
> +       u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
> +
> +       mr->mr = mlx5_idx_to_mkey(mkey_index);
>  }
>
>  static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
>  {
> +       if (!mr->mr)
> +               return;
> +
>         mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
>  }
>
> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms)
>         return 16 * ALIGN(nklms, 4);
>  }
>
> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> +{
> +       struct mlx5_vdpa_async_cmd *cmds = NULL;
> +       struct mlx5_vdpa_direct_mr *dmr;
> +       int err = 0;
> +       int i = 0;
> +
> +       cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
> +       if (!cmds)
> +               return -ENOMEM;

Nit: this could benefit from Scope-based Cleanup Helpers [1], as it
would make clear that memory is always removed at the end of the
function & could prevent errors if the function is modified in the
future. I'm a big fan of them so my opinion may be biased though :).

It would be great to be able to remove the array members with that
too, but I think the kernel doesn't have any facility for that.

> +
> +       list_for_each_entry(dmr, &mr->head, list) {
> +               struct mlx5_create_mkey_mem *cmd_mem;
> +               int mttlen, mttcount;
> +
> +               mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16);

I don't get the roundup here, I guess it is so the driver does not
send potentially uninitialized memory to the device? Maybe the 16
should be a macro?

> +               mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
> +               cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
> +               if (!cmd_mem) {
> +                       err = -ENOMEM;
> +                       goto done;
> +               }
> +
> +               cmds[i].out = cmd_mem->out;
> +               cmds[i].outlen = sizeof(cmd_mem->out);
> +               cmds[i].in = cmd_mem->in;
> +               cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
> +
> +               fill_create_direct_mr(mvdev, dmr, cmd_mem);
> +
> +               i++;
> +       }
> +
> +       err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
> +       if (err) {
> +
> +               mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
> +               goto done;
> +       }
> +
> +       i = 0;
> +       list_for_each_entry(dmr, &mr->head, list) {
> +               struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
> +               struct mlx5_create_mkey_mem *cmd_mem;
> +
> +               cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
> +
> +               if (!cmd->err) {
> +                       create_direct_mr_end(mvdev, dmr, cmd_mem);

The caller function doesn't trust the result if we return an error.
Why not use the previous loop to call create_direct_mr_end? Am I
missing any side effect?

> +               } else {
> +                       err = err ? err : cmd->err;
> +                       mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
> +                               dmr->start, dmr->end, cmd->err);
> +               }
> +       }
> +
> +done:
> +       for (i = i-1; i >= 0; i--) {
> +               struct mlx5_create_mkey_mem *cmd_mem;
> +
> +               cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
> +               kvfree(cmd_mem);
> +       }
> +
> +       kvfree(cmds);
> +       return err;
> +}
> +
>  static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>  {
>         int inlen;
> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
>                 goto err_map;
>         }
>
> -       err = create_direct_mr(mvdev, mr);
> -       if (err)
> -               goto err_direct;
> -
>         return 0;
>
> -err_direct:
> -       dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
>  err_map:
>         sg_free_table(&mr->sg_head);
>         return err;
> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
>         if (err)
>                 goto err_chain;
>
> +       err = create_direct_keys(mvdev, mr);
> +       if (err)
> +               goto err_chain;
> +
>         /* Create the memory key that defines the guests's address space. This
>          * memory key refers to the direct keys that contain the MTT
>          * translations
> --
> 2.45.1
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ