[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5726243A.9070109@codeaurora.org>
Date: Sun, 1 May 2016 11:43:54 -0400
From: Sinan Kaya <okaya@...eaurora.org>
To: Haggai Abramovsky <hagaya@...lanox.com>,
"David S. Miller" <davem@...emloft.net>,
Doug Ledford <dledford@...hat.com>
Cc: linux-rdma@...r.kernel.org, netdev@...r.kernel.org,
Timur Tabi <timur@...eaurora.org>,
Eli Cohen <eli@...lanox.com>,
Or Gerlitz <ogerlitz@...lanox.com>,
Eran Ben Elisha <eranbe@...lanox.com>,
Yishai Hadas <yishaih@...lanox.com>,
Tal Alon <talal@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>
Subject: Re: [PATCH v1 net] net/mlx4: Avoid wrong virtual mappings
On 4/27/2016 3:07 AM, Haggai Abramovsky wrote:
> The dma_alloc_coherent() function returns a virtual address which can
> be used for coherent access to the underlying memory. On some
> architectures, like arm64, undefined behavior results if this memory is
> also accessed via virtual mappings that are not coherent. Because of
> their undefined nature, operations like virt_to_page() return garbage
> when passed virtual addresses obtained from dma_alloc_coherent(). Any
> subsequent mappings via vmap() of the garbage page values are unusable
> and result in bad things like bus errors (synchronous aborts in ARM64
> speak).
>
> The mlx4 driver contains code that does the equivalent of:
> vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the
> device is opened.
>
> Prevent Ethernet driver to run this problematic code by forcing it to
> allocate contiguous memory. As for the Infiniband driver, at first we
> are trying to allocate contiguous memory, but in case of failure roll
> back to work with fragmented memory.
>
> Signed-off-by: Haggai Abramovsky <hagaya@...lanox.com>
> Signed-off-by: Yishai Hadas <yishaih@...lanox.com>
> Reported-by: David Daney <david.daney@...ium.com>
> Tested-by: Sinan Kaya <okaya@...eaurora.org>
> ---
> Changes from v0:
> Fix indentation error raised by LeonR
>
> drivers/infiniband/hw/mlx4/qp.c | 26 +++++--
> drivers/net/ethernet/mellanox/mlx4/alloc.c | 93 ++++++++++-------------
> drivers/net/ethernet/mellanox/mlx4/en_cq.c | 9 +--
> drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
> drivers/net/ethernet/mellanox/mlx4/en_resources.c | 31 --------
> drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +--
> drivers/net/ethernet/mellanox/mlx4/en_tx.c | 14 +---
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 -
> include/linux/mlx4/device.h | 4 +-
> 9 files changed, 67 insertions(+), 125 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
> index fd97534..842a6da 100644
> --- a/drivers/infiniband/hw/mlx4/qp.c
> +++ b/drivers/infiniband/hw/mlx4/qp.c
> @@ -419,7 +419,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
> }
>
> static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
> - enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
> + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
> + int shrink_wqe)
> {
> int s;
>
> @@ -477,7 +478,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
> * We set WQE size to at least 64 bytes, this way stamping
> * invalidates each WQE.
> */
> - if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
> + if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
> qp->sq_signal_bits && BITS_PER_LONG == 64 &&
> type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
> !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
> @@ -642,6 +643,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
> {
> int qpn;
> int err;
> + struct ib_qp_cap backup_cap;
> struct mlx4_ib_sqp *sqp;
> struct mlx4_ib_qp *qp;
> enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
> @@ -775,7 +777,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
> goto err;
> }
>
> - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
> + memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
> + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp, 1);
> if (err)
> goto err;
>
> @@ -787,9 +790,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
> *qp->db.db = 0;
> }
>
> - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
> - err = -ENOMEM;
> - goto err_db;
> + if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
> + &qp->buf, gfp)) {
> + memcpy(&init_attr->cap, &backup_cap,
> + sizeof(backup_cap));
> + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
> + qp, 0);
> + if (err)
> + goto err_db;
> +
> + if (mlx4_buf_alloc(dev->dev, qp->buf_size,
> + PAGE_SIZE * 2, &qp->buf, gfp)) {
> + err = -ENOMEM;
> + goto err_db;
> + }
> }
>
> err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
> diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> index 0c51c69..249a458 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> @@ -576,41 +576,48 @@ out:
>
> return res;
> }
> -/*
> - * Handling for queue buffers -- we allocate a bunch of memory and
> - * register it in a memory region at HCA virtual address 0. If the
> - * requested size is > max_direct, we split the allocation into
> - * multiple pages, so we don't require too much contiguous memory.
> - */
>
> -int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
> - struct mlx4_buf *buf, gfp_t gfp)
> +static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
> + struct mlx4_buf *buf, gfp_t gfp)
> {
> dma_addr_t t;
>
> - if (size <= max_direct) {
> - buf->nbufs = 1;
> - buf->npages = 1;
> - buf->page_shift = get_order(size) + PAGE_SHIFT;
> - buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev,
> - size, &t, gfp);
> - if (!buf->direct.buf)
> - return -ENOMEM;
> + buf->nbufs = 1;
> + buf->npages = 1;
> + buf->page_shift = get_order(size) + PAGE_SHIFT;
> + buf->direct.buf =
> + dma_zalloc_coherent(&dev->persist->pdev->dev,
> + size, &t, gfp);
> + if (!buf->direct.buf)
> + return -ENOMEM;
>
> - buf->direct.map = t;
> + buf->direct.map = t;
>
> - while (t & ((1 << buf->page_shift) - 1)) {
> - --buf->page_shift;
> - buf->npages *= 2;
> - }
> + while (t & ((1 << buf->page_shift) - 1)) {
> + --buf->page_shift;
> + buf->npages *= 2;
> + }
>
> - memset(buf->direct.buf, 0, size);
> + return 0;
> +}
> +
> +/* Handling for queue buffers -- we allocate a bunch of memory and
> + * register it in a memory region at HCA virtual address 0. If the
> + * requested size is > max_direct, we split the allocation into
> + * multiple pages, so we don't require too much contiguous memory.
> + */
> +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
> + struct mlx4_buf *buf, gfp_t gfp)
> +{
> + if (size <= max_direct) {
> + return mlx4_buf_direct_alloc(dev, size, buf, gfp);
> } else {
> + dma_addr_t t;
> int i;
>
> - buf->direct.buf = NULL;
> - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> - buf->npages = buf->nbufs;
> + buf->direct.buf = NULL;
> + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> + buf->npages = buf->nbufs;
> buf->page_shift = PAGE_SHIFT;
> buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
> gfp);
> @@ -619,28 +626,12 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
>
> for (i = 0; i < buf->nbufs; ++i) {
> buf->page_list[i].buf =
> - dma_alloc_coherent(&dev->persist->pdev->dev,
> - PAGE_SIZE,
> - &t, gfp);
> + dma_zalloc_coherent(&dev->persist->pdev->dev,
> + PAGE_SIZE, &t, gfp);
> if (!buf->page_list[i].buf)
> goto err_free;
>
> buf->page_list[i].map = t;
> -
> - memset(buf->page_list[i].buf, 0, PAGE_SIZE);
> - }
> -
> - if (BITS_PER_LONG == 64) {
> - struct page **pages;
> - pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
> - if (!pages)
> - goto err_free;
> - for (i = 0; i < buf->nbufs; ++i)
> - pages[i] = virt_to_page(buf->page_list[i].buf);
> - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
> - kfree(pages);
> - if (!buf->direct.buf)
> - goto err_free;
> }
> }
>
> @@ -655,15 +646,11 @@ EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
>
> void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
> {
> - int i;
> -
> - if (buf->nbufs == 1)
> + if (buf->nbufs == 1) {
> dma_free_coherent(&dev->persist->pdev->dev, size,
> - buf->direct.buf,
> - buf->direct.map);
> - else {
> - if (BITS_PER_LONG == 64)
> - vunmap(buf->direct.buf);
> + buf->direct.buf, buf->direct.map);
> + } else {
> + int i;
>
> for (i = 0; i < buf->nbufs; ++i)
> if (buf->page_list[i].buf)
> @@ -789,7 +776,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
> EXPORT_SYMBOL_GPL(mlx4_db_free);
>
> int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
> - int size, int max_direct)
> + int size)
> {
> int err;
>
> @@ -799,7 +786,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
>
> *wqres->db.db = 0;
>
> - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
> + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
> if (err)
> goto err_db;
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> index af975a2..132cea6 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> @@ -73,22 +73,16 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
> */
> set_dev_node(&mdev->dev->persist->pdev->dev, node);
> err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
> - cq->buf_size, 2 * PAGE_SIZE);
> + cq->buf_size);
> set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
> if (err)
> goto err_cq;
>
> - err = mlx4_en_map_buffer(&cq->wqres.buf);
> - if (err)
> - goto err_res;
> -
> cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
> *pcq = cq;
>
> return 0;
>
> -err_res:
> - mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
> err_cq:
> kfree(cq);
> *pcq = NULL;
> @@ -177,7 +171,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
> struct mlx4_en_dev *mdev = priv->mdev;
> struct mlx4_en_cq *cq = *pcq;
>
> - mlx4_en_unmap_buffer(&cq->wqres.buf);
> mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
> if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
> cq->is_tx == RX)
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index b4b258c..5b19178 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -2907,7 +2907,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
>
> /* Allocate page for receive rings */
> err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
> - MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
> + MLX4_EN_PAGE_SIZE);
> if (err) {
> en_err(priv, "Failed to allocate page for rx qps\n");
> goto out;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> index 02e925d..a6b0db0 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> @@ -107,37 +107,6 @@ int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
> return ret;
> }
>
> -int mlx4_en_map_buffer(struct mlx4_buf *buf)
> -{
> - struct page **pages;
> - int i;
> -
> - if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> - return 0;
> -
> - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
> - if (!pages)
> - return -ENOMEM;
> -
> - for (i = 0; i < buf->nbufs; ++i)
> - pages[i] = virt_to_page(buf->page_list[i].buf);
> -
> - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
> - kfree(pages);
> - if (!buf->direct.buf)
> - return -ENOMEM;
> -
> - return 0;
> -}
> -
> -void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
> -{
> - if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> - return;
> -
> - vunmap(buf->direct.buf);
> -}
> -
> void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
> {
> return;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index 7d25bc9..89775bb 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -394,17 +394,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
>
> /* Allocate HW buffers on provided NUMA node */
> set_dev_node(&mdev->dev->persist->pdev->dev, node);
> - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
> - ring->buf_size, 2 * PAGE_SIZE);
> + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
> set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
> if (err)
> goto err_info;
>
> - err = mlx4_en_map_buffer(&ring->wqres.buf);
> - if (err) {
> - en_err(priv, "Failed to map RX buffer\n");
> - goto err_hwq;
> - }
> ring->buf = ring->wqres.buf.direct.buf;
>
> ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
> @@ -412,8 +406,6 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
> *pring = ring;
> return 0;
>
> -err_hwq:
> - mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
> err_info:
> vfree(ring->rx_info);
> ring->rx_info = NULL;
> @@ -517,7 +509,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
> struct mlx4_en_dev *mdev = priv->mdev;
> struct mlx4_en_rx_ring *ring = *pring;
>
> - mlx4_en_unmap_buffer(&ring->wqres.buf);
> mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
> vfree(ring->rx_info);
> ring->rx_info = NULL;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index c0d7b72..b9ab646 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -93,20 +93,13 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
>
> /* Allocate HW buffers on provided NUMA node */
> set_dev_node(&mdev->dev->persist->pdev->dev, node);
> - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
> - 2 * PAGE_SIZE);
> + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
> set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
> if (err) {
> en_err(priv, "Failed allocating hwq resources\n");
> goto err_bounce;
> }
>
> - err = mlx4_en_map_buffer(&ring->wqres.buf);
> - if (err) {
> - en_err(priv, "Failed to map TX buffer\n");
> - goto err_hwq_res;
> - }
> -
> ring->buf = ring->wqres.buf.direct.buf;
>
> en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
> @@ -117,7 +110,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
> MLX4_RESERVE_ETH_BF_QP);
> if (err) {
> en_err(priv, "failed reserving qp for TX ring\n");
> - goto err_map;
> + goto err_hwq_res;
> }
>
> err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
> @@ -154,8 +147,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
>
> err_reserve:
> mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
> -err_map:
> - mlx4_en_unmap_buffer(&ring->wqres.buf);
> err_hwq_res:
> mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
> err_bounce:
> @@ -182,7 +173,6 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
> mlx4_qp_remove(mdev->dev, &ring->qp);
> mlx4_qp_free(mdev->dev, &ring->qp);
> mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
> - mlx4_en_unmap_buffer(&ring->wqres.buf);
> mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
> kfree(ring->bounce_buf);
> ring->bounce_buf = NULL;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> index d12ab6a..a70e2d0 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> @@ -671,8 +671,6 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
> int is_tx, int rss, int qpn, int cqn, int user_prio,
> struct mlx4_qp_context *context);
> void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
> -int mlx4_en_map_buffer(struct mlx4_buf *buf);
> -void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
> int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
> int loopback);
> void mlx4_en_calc_rx_buf(struct net_device *dev);
> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
> index 8541a91..72da65f 100644
> --- a/include/linux/mlx4/device.h
> +++ b/include/linux/mlx4/device.h
> @@ -1051,7 +1051,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
> void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
> static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
> {
> - if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> + if (buf->nbufs == 1)
> return buf->direct.buf + offset;
> else
> return buf->page_list[offset >> PAGE_SHIFT].buf +
> @@ -1091,7 +1091,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
> void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
>
> int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
> - int size, int max_direct);
> + int size);
> void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
> int size);
>
>
Can we get this queued for 4.7? Mellanox with arm64 has been broken over 1.5 years.
--
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Powered by blists - more mailing lists