lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 14 Dec 2022 14:46:31 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Gautam Dawar <gautam.dawar@....com>
Cc:     linux-net-drivers@....com, netdev@...r.kernel.org,
        eperezma@...hat.com, tanuj.kamde@....com, Koushik.Dutta@....com,
        harpreet.anand@....com, Edward Cree <ecree.xilinx@...il.com>,
        Martin Habets <habetsm.xilinx@...il.com>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next 09/11] sfc: implement iova rbtree to store dma mappings

On Wed, Dec 7, 2022 at 10:57 PM Gautam Dawar <gautam.dawar@....com> wrote:
>
> sfc uses a MCDI DMA buffer that is allocated on the host
> for communicating with the Firmware. The MCDI buffer IOVA
> could overlap with the IOVA used by the guest for the
> virtqueue buffers. To detect such overlap, the DMA mappings
> from the guest will be stored in a IOVA rbtree and every
> such mapping will be compared against the MCDI buffer IOVA
> range. If an overlap is detected, the MCDI buffer will be
> relocated to a different IOVA.

I think it can't prevent guests from guessing the MCDI buffer address
and trying to DMA to/from that buffer.

It might work with some co-operation of the NIC who can validate the
DMA initialized by the virtqueue and forbid the DMA to the MDCI
buffer.

Thanks


>
> Signed-off-by: Gautam Dawar <gautam.dawar@....com>
> ---
>  drivers/net/ethernet/sfc/Makefile         |   3 +-
>  drivers/net/ethernet/sfc/ef100_iova.c     | 205 ++++++++++++++++++++++
>  drivers/net/ethernet/sfc/ef100_iova.h     |  40 +++++
>  drivers/net/ethernet/sfc/ef100_nic.c      |   1 -
>  drivers/net/ethernet/sfc/ef100_vdpa.c     |  38 ++++
>  drivers/net/ethernet/sfc/ef100_vdpa.h     |  15 ++
>  drivers/net/ethernet/sfc/ef100_vdpa_ops.c |   5 +
>  drivers/net/ethernet/sfc/mcdi.h           |   3 +
>  8 files changed, 308 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/ethernet/sfc/ef100_iova.c
>  create mode 100644 drivers/net/ethernet/sfc/ef100_iova.h
>
> diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
> index a10eac91ab23..85852ff50b7c 100644
> --- a/drivers/net/ethernet/sfc/Makefile
> +++ b/drivers/net/ethernet/sfc/Makefile
> @@ -11,7 +11,8 @@ sfc-$(CONFIG_SFC_MTD) += mtd.o
>  sfc-$(CONFIG_SFC_SRIOV)        += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \
>                             mae.o tc.o tc_bindings.o tc_counters.o
>
> -sfc-$(CONFIG_SFC_VDPA) += mcdi_vdpa.o ef100_vdpa.o ef100_vdpa_ops.o
> +sfc-$(CONFIG_SFC_VDPA) += mcdi_vdpa.o ef100_vdpa.o ef100_vdpa_ops.o \
> +                          ef100_iova.o
>  obj-$(CONFIG_SFC)      += sfc.o
>
>  obj-$(CONFIG_SFC_FALCON) += falcon/
> diff --git a/drivers/net/ethernet/sfc/ef100_iova.c b/drivers/net/ethernet/sfc/ef100_iova.c
> new file mode 100644
> index 000000000000..863314c5b9b5
> --- /dev/null
> +++ b/drivers/net/ethernet/sfc/ef100_iova.c
> @@ -0,0 +1,205 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Driver for Xilinx network controllers and boards
> + * Copyright(C) 2020-2022 Xilinx, Inc.
> + * Copyright(C) 2022 Advanced Micro Devices, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation, incorporated herein by reference.
> + */
> +
> +#include "ef100_iova.h"
> +
> +static void update_free_list_node(struct ef100_vdpa_iova_node *target_node,
> +                                 struct ef100_vdpa_iova_node *next_node,
> +                                 struct ef100_vdpa_nic *vdpa_nic)
> +{
> +       unsigned long target_node_end;
> +       unsigned long free_area;
> +       bool in_list;
> +
> +       target_node_end = target_node->iova + target_node->size;
> +       free_area = next_node->iova - target_node_end;
> +       in_list = !(list_empty(&target_node->free_node));
> +
> +       if (!in_list && free_area >= MCDI_BUF_LEN) {
> +               list_add(&target_node->free_node,
> +                        &vdpa_nic->free_list);
> +       } else if (in_list && free_area < MCDI_BUF_LEN) {
> +               list_del_init(&target_node->free_node);
> +       }
> +}
> +
> +static void update_free_list(struct ef100_vdpa_iova_node *iova_node,
> +                            struct ef100_vdpa_nic *vdpa_nic,
> +                            bool add_node)
> +{
> +       struct ef100_vdpa_iova_node *prev_in = NULL;
> +       struct ef100_vdpa_iova_node *next_in = NULL;
> +       struct rb_node *prev_node;
> +       struct rb_node *next_node;
> +
> +       prev_node = rb_prev(&iova_node->node);
> +       next_node = rb_next(&iova_node->node);
> +
> +       if (prev_node)
> +               prev_in = rb_entry(prev_node,
> +                                  struct ef100_vdpa_iova_node, node);
> +       if (next_node)
> +               next_in = rb_entry(next_node,
> +                                  struct ef100_vdpa_iova_node, node);
> +
> +       if (add_node) {
> +               if (prev_in)
> +                       update_free_list_node(prev_in, iova_node, vdpa_nic);
> +
> +               if (next_in)
> +                       update_free_list_node(iova_node, next_in, vdpa_nic);
> +       } else {
> +               if (next_in && prev_in)
> +                       update_free_list_node(prev_in, next_in, vdpa_nic);
> +               if (!list_empty(&iova_node->free_node))
> +                       list_del_init(&iova_node->free_node);
> +       }
> +}
> +
> +int efx_ef100_insert_iova_node(struct ef100_vdpa_nic *vdpa_nic,
> +                              u64 iova, u64 size)
> +{
> +       struct ef100_vdpa_iova_node *iova_node;
> +       struct ef100_vdpa_iova_node *new_node;
> +       struct rb_node *parent;
> +       struct rb_node **link;
> +       struct rb_root *root;
> +       int rc = 0;
> +
> +       mutex_lock(&vdpa_nic->iova_lock);
> +
> +       root = &vdpa_nic->iova_root;
> +       link = &root->rb_node;
> +       parent = *link;
> +       /* Go to the bottom of the tree */
> +       while (*link) {
> +               parent = *link;
> +               iova_node = rb_entry(parent, struct ef100_vdpa_iova_node, node);
> +
> +               /* handle duplicate node */
> +               if (iova_node->iova == iova) {
> +                       rc = -EEXIST;
> +                       goto out_unlock;
> +               }
> +
> +               if (iova_node->iova > iova)
> +                       link = &(*link)->rb_left;
> +               else
> +                       link = &(*link)->rb_right;
> +       }
> +
> +       new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
> +       if (!new_node) {
> +               rc = -ENOMEM;
> +               goto out_unlock;
> +       }
> +
> +       new_node->iova = iova;
> +       new_node->size = size;
> +       INIT_LIST_HEAD(&new_node->free_node);
> +
> +       /* Put the new node here */
> +       rb_link_node(&new_node->node, parent, link);
> +       rb_insert_color(&new_node->node, root);
> +
> +       update_free_list(new_node, vdpa_nic, true);
> +
> +out_unlock:
> +       mutex_unlock(&vdpa_nic->iova_lock);
> +       return rc;
> +}
> +
> +static struct ef100_vdpa_iova_node*
> +ef100_rbt_search_node(struct ef100_vdpa_nic *vdpa_nic,
> +                     unsigned long iova)
> +{
> +       struct ef100_vdpa_iova_node *iova_node;
> +       struct rb_node *rb_node;
> +       struct rb_root *root;
> +
> +       root = &vdpa_nic->iova_root;
> +       if (!root)
> +               return NULL;
> +
> +       rb_node = root->rb_node;
> +
> +       while (rb_node) {
> +               iova_node = rb_entry(rb_node, struct ef100_vdpa_iova_node,
> +                                    node);
> +               if (iova_node->iova > iova)
> +                       rb_node = rb_node->rb_left;
> +               else if (iova_node->iova < iova)
> +                       rb_node = rb_node->rb_right;
> +               else
> +                       return iova_node;
> +       }
> +
> +       return NULL;
> +}
> +
> +void efx_ef100_remove_iova_node(struct ef100_vdpa_nic *vdpa_nic,
> +                               unsigned long iova)
> +{
> +       struct ef100_vdpa_iova_node *iova_node;
> +
> +       mutex_lock(&vdpa_nic->iova_lock);
> +       iova_node = ef100_rbt_search_node(vdpa_nic, iova);
> +       if (!iova_node)
> +               goto out_unlock;
> +
> +       update_free_list(iova_node, vdpa_nic, false);
> +
> +       rb_erase(&iova_node->node, &vdpa_nic->iova_root);
> +       kfree(iova_node);
> +
> +out_unlock:
> +       mutex_unlock(&vdpa_nic->iova_lock);
> +}
> +
> +void efx_ef100_delete_iova(struct ef100_vdpa_nic *vdpa_nic)
> +{
> +       struct ef100_vdpa_iova_node *iova_node;
> +       struct rb_root *iova_root;
> +       struct rb_node *node;
> +
> +       mutex_lock(&vdpa_nic->iova_lock);
> +
> +       iova_root = &vdpa_nic->iova_root;
> +       while (!RB_EMPTY_ROOT(iova_root)) {
> +               node = rb_first(iova_root);
> +               iova_node = rb_entry(node, struct ef100_vdpa_iova_node, node);
> +               if (!list_empty(&iova_node->free_node))
> +                       list_del_init(&iova_node->free_node);
> +               if (vdpa_nic->domain)
> +                       iommu_unmap(vdpa_nic->domain, iova_node->iova,
> +                                   iova_node->size);
> +               rb_erase(node, iova_root);
> +               kfree(iova_node);
> +       }
> +
> +       mutex_unlock(&vdpa_nic->iova_lock);
> +}
> +
> +int efx_ef100_find_new_iova(struct ef100_vdpa_nic *vdpa_nic,
> +                           unsigned int buf_len,
> +                           u64 *new_iova)
> +{
> +       struct ef100_vdpa_iova_node *iova_node;
> +
> +       /* pick the first node from freelist */
> +       iova_node = list_first_entry_or_null(&vdpa_nic->free_list,
> +                                            struct ef100_vdpa_iova_node,
> +                                            free_node);
> +       if (!iova_node)
> +               return -ENOENT;
> +
> +       *new_iova = iova_node->iova + iova_node->size;
> +       return 0;
> +}
> diff --git a/drivers/net/ethernet/sfc/ef100_iova.h b/drivers/net/ethernet/sfc/ef100_iova.h
> new file mode 100644
> index 000000000000..68e39c4152c7
> --- /dev/null
> +++ b/drivers/net/ethernet/sfc/ef100_iova.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Driver for Xilinx network controllers and boards
> + * Copyright(C) 2020-2022 Xilinx, Inc.
> + * Copyright(C) 2022 Advanced Micro Devices, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation, incorporated herein by reference.
> + */
> +#ifndef EFX_EF100_IOVA_H
> +#define EFX_EF100_IOVA_H
> +
> +#include "ef100_nic.h"
> +#include "ef100_vdpa.h"
> +
> +#if defined(CONFIG_SFC_VDPA)
> +/**
> + * struct ef100_vdpa_iova_node - guest buffer iova entry
> + *
> + * @node: red black tree node
> + * @iova: mapping's IO virtual address
> + * @size: length of mapped region in bytes
> + * @free_node: free list node
> + */
> +struct ef100_vdpa_iova_node {
> +       struct rb_node node;
> +       unsigned long iova;
> +       size_t size;
> +       struct list_head free_node;
> +};
> +
> +int efx_ef100_insert_iova_node(struct ef100_vdpa_nic *vdpa_nic,
> +                              u64 iova, u64 size);
> +void efx_ef100_remove_iova_node(struct ef100_vdpa_nic *vdpa_nic,
> +                               unsigned long iova);
> +void efx_ef100_delete_iova(struct ef100_vdpa_nic *vdpa_nic);
> +int efx_ef100_find_new_iova(struct ef100_vdpa_nic *vdpa_nic,
> +                           unsigned int buf_len, u64 *new_iova);
> +#endif  /* CONFIG_SFC_VDPA */
> +#endif /* EFX_EF100_IOVA_H */
> diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
> index 41811c519275..72820d2fe19d 100644
> --- a/drivers/net/ethernet/sfc/ef100_nic.c
> +++ b/drivers/net/ethernet/sfc/ef100_nic.c
> @@ -33,7 +33,6 @@
>
>  #define EF100_MAX_VIS 4096
>  #define EF100_NUM_MCDI_BUFFERS 1
> -#define MCDI_BUF_LEN (8 + MCDI_CTL_SDU_LEN_MAX)
>
>  #define EF100_RESET_PORT ((ETH_RESET_MAC | ETH_RESET_PHY) << ETH_RESET_SHARED_SHIFT)
>
> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.c b/drivers/net/ethernet/sfc/ef100_vdpa.c
> index 80bca281a748..b9368eb1acd5 100644
> --- a/drivers/net/ethernet/sfc/ef100_vdpa.c
> +++ b/drivers/net/ethernet/sfc/ef100_vdpa.c
> @@ -14,6 +14,7 @@
>  #include <uapi/linux/vdpa.h>
>  #include "ef100_vdpa.h"
>  #include "mcdi_vdpa.h"
> +#include "ef100_iova.h"
>  #include "mcdi_filters.h"
>  #include "mcdi_functions.h"
>  #include "ef100_netdev.h"
> @@ -280,6 +281,34 @@ static int get_net_config(struct ef100_vdpa_nic *vdpa_nic)
>         return 0;
>  }
>
> +static int vdpa_update_domain(struct ef100_vdpa_nic *vdpa_nic)
> +{
> +       struct vdpa_device *vdpa = &vdpa_nic->vdpa_dev;
> +       struct iommu_domain_geometry *geo;
> +       struct device *dma_dev;
> +
> +       dma_dev = vdpa_get_dma_dev(vdpa);
> +       if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
> +               return -EOPNOTSUPP;
> +
> +       vdpa_nic->domain = iommu_get_domain_for_dev(dma_dev);
> +       if (!vdpa_nic->domain)
> +               return -ENODEV;
> +
> +       geo = &vdpa_nic->domain->geometry;
> +       /* save the geo aperture range for validation in dma_map */
> +       vdpa_nic->geo_aper_start = geo->aperture_start;
> +
> +       /* Handle the boundary case */
> +       if (geo->aperture_end == ~0ULL)
> +               geo->aperture_end -= 1;
> +       vdpa_nic->geo_aper_end = geo->aperture_end;
> +
> +       /* insert a sentinel node */
> +       return efx_ef100_insert_iova_node(vdpa_nic,
> +                                         vdpa_nic->geo_aper_end + 1, 0);
> +}
> +
>  static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
>                                                 const char *dev_name,
>                                                 enum ef100_vdpa_class dev_type,
> @@ -316,6 +345,7 @@ static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
>         }
>
>         mutex_init(&vdpa_nic->lock);
> +       mutex_init(&vdpa_nic->iova_lock);
>         dev = &vdpa_nic->vdpa_dev.dev;
>         efx->vdpa_nic = vdpa_nic;
>         vdpa_nic->vdpa_dev.dma_dev = &efx->pci_dev->dev;
> @@ -325,9 +355,11 @@ static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
>         vdpa_nic->pf_index = nic_data->pf_index;
>         vdpa_nic->vf_index = nic_data->vf_index;
>         vdpa_nic->vdpa_state = EF100_VDPA_STATE_INITIALIZED;
> +       vdpa_nic->iova_root = RB_ROOT;
>         vdpa_nic->mac_address = (u8 *)&vdpa_nic->net_config.mac;
>         ether_addr_copy(vdpa_nic->mac_address, mac);
>         vdpa_nic->mac_configured = true;
> +       INIT_LIST_HEAD(&vdpa_nic->free_list);
>
>         for (i = 0; i < EF100_VDPA_MAC_FILTER_NTYPES; i++)
>                 vdpa_nic->filters[i].filter_id = EFX_INVALID_FILTER_ID;
> @@ -353,6 +385,12 @@ static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
>                 goto err_put_device;
>         }
>
> +       rc = vdpa_update_domain(vdpa_nic);
> +       if (rc) {
> +               pci_err(efx->pci_dev, "update_domain failed, err: %d\n", rc);
> +               goto err_put_device;
> +       }
> +
>         rc = get_net_config(vdpa_nic);
>         if (rc)
>                 goto err_put_device;
> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.h b/drivers/net/ethernet/sfc/ef100_vdpa.h
> index 1b0bbba88154..c3c77029973d 100644
> --- a/drivers/net/ethernet/sfc/ef100_vdpa.h
> +++ b/drivers/net/ethernet/sfc/ef100_vdpa.h
> @@ -12,7 +12,9 @@
>  #define __EF100_VDPA_H__
>
>  #include <linux/vdpa.h>
> +#include <linux/iommu.h>
>  #include <uapi/linux/virtio_net.h>
> +#include <linux/rbtree.h>
>  #include "net_driver.h"
>  #include "ef100_nic.h"
>
> @@ -155,6 +157,12 @@ struct ef100_vdpa_filter {
>   * @mac_configured: true after MAC address is configured
>   * @filters: details of all filters created on this vdpa device
>   * @cfg_cb: callback for config change
> + * @domain: IOMMU domain
> + * @iova_root: iova rbtree root
> + * @iova_lock: lock to synchronize updates to rbtree and freelist
> + * @free_list: list to store free iova areas of size >= MCDI buffer length
> + * @geo_aper_start: start of valid IOVA range
> + * @geo_aper_end: end of valid IOVA range
>   */
>  struct ef100_vdpa_nic {
>         struct vdpa_device vdpa_dev;
> @@ -174,6 +182,13 @@ struct ef100_vdpa_nic {
>         bool mac_configured;
>         struct ef100_vdpa_filter filters[EF100_VDPA_MAC_FILTER_NTYPES];
>         struct vdpa_callback cfg_cb;
> +       struct iommu_domain *domain;
> +       struct rb_root iova_root;
> +       /* mutex to synchronize rbtree operations */
> +       struct mutex iova_lock;
> +       struct list_head free_list;
> +       u64 geo_aper_start;
> +       u64 geo_aper_end;
>  };
>
>  int ef100_vdpa_init(struct efx_probe_data *probe_data);
> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
> index 718b67f6da90..8c198d949fdb 100644
> --- a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
> +++ b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
> @@ -10,6 +10,7 @@
>
>  #include <linux/vdpa.h>
>  #include "ef100_vdpa.h"
> +#include "ef100_iova.h"
>  #include "io.h"
>  #include "mcdi_vdpa.h"
>
> @@ -260,6 +261,7 @@ static void ef100_reset_vdpa_device(struct ef100_vdpa_nic *vdpa_nic)
>         if (!vdpa_nic->status)
>                 return;
>
> +       efx_ef100_delete_iova(vdpa_nic);
>         vdpa_nic->vdpa_state = EF100_VDPA_STATE_INITIALIZED;
>         vdpa_nic->status = 0;
>         vdpa_nic->features = 0;
> @@ -743,9 +745,12 @@ static void ef100_vdpa_free(struct vdpa_device *vdev)
>         int i;
>
>         if (vdpa_nic) {
> +               /* clean-up the mappings and iova tree */
> +               efx_ef100_delete_iova(vdpa_nic);
>                 for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++)
>                         reset_vring(vdpa_nic, i);
>                 ef100_vdpa_irq_vectors_free(vdpa_nic->efx->pci_dev);
> +               mutex_destroy(&vdpa_nic->iova_lock);
>                 mutex_destroy(&vdpa_nic->lock);
>                 vdpa_nic->efx->vdpa_nic = NULL;
>         }
> diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
> index db4ca4975ada..7d977a58a0df 100644
> --- a/drivers/net/ethernet/sfc/mcdi.h
> +++ b/drivers/net/ethernet/sfc/mcdi.h
> @@ -7,6 +7,7 @@
>  #ifndef EFX_MCDI_H
>  #define EFX_MCDI_H
>
> +#include "mcdi_pcol.h"
>  /**
>   * enum efx_mcdi_state - MCDI request handling state
>   * @MCDI_STATE_QUIESCENT: No pending MCDI requests. If the caller holds the
> @@ -40,6 +41,8 @@ enum efx_mcdi_mode {
>         MCDI_MODE_FAIL,
>  };
>
> +#define MCDI_BUF_LEN (8 + MCDI_CTL_SDU_LEN_MAX)
> +
>  /**
>   * struct efx_mcdi_iface - MCDI protocol context
>   * @efx: The associated NIC.
> --
> 2.30.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ