[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1520223607-17560-1-git-send-email-srn@prgmr.com>
Date: Sun, 4 Mar 2018 20:20:07 -0800
From: Sarah Newman <srn@...mr.com>
To: Tariq Toukan <tariqt@...lanox.com>,
Yishai Hadas <yishaih@...lanox.com>
Cc: netdev@...r.kernel.org, Sarah Newman <srn@...mr.com>
Subject: [PATCH] net/mlx4_en: fix potential use-after-free with dma_unmap_page
Take an additional reference to a page whenever it is placed
into the rx ring and put the page again after running
dma_unmap_page.
When swiotlb is in use, calling dma_unmap_page means that
the original page mapped with dma_map_page must still be valid,
as swiotlb will copy data from its internal cache back to the
originally requested DMA location.
When GRO is enabled, before this patch all references to the
original frag may be put and the page freed before dma_unmap_page
in mlx4_en_free_frag is called.
It is possible there is a path where the use-after-free occurs
even with GRO disabled, but this has not been observed so far.
The bug can be trivially detected by doing the following:
* Compile the kernel with DEBUG_PAGEALLOC
* Run the kernel as a Xen Dom0
* Leave GRO enabled on the interface
* Run a 10 second or more test with iperf over the interface.
This bug was likely introduced in
commit 4cce66cdd14a ("mlx4_en: map entire pages to increase throughput"),
first part of u3.6.
It was incidentally fixed in
commit 34db548bfb95 ("mlx4: add page recycling in receive path"),
first part of v4.12.
This version applies to the v4.9 series.
Signed-off-by: Sarah Newman <srn@...mr.com>
Tested-by: Sarah Newman <srn@...mr.com>
---
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 39 +++++++++++++++++++++-------
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 ++-
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 +
3 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index bcbb80f..d1fb087 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -80,10 +80,14 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
page_alloc->page = page;
page_alloc->dma = dma;
page_alloc->page_offset = 0;
+ page_alloc->page_owner = true;
/* Not doing get_page() for each frag is a big win
* on asymetric workloads. Note we can not use atomic_set().
*/
- page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
+ /* Since the page must be valid until after dma_unmap_page is called,
+ * take an additional reference we would not have otherwise.
+ */
+ page_ref_add(page, page_alloc->page_size / frag_info->frag_stride);
return 0;
}
@@ -105,9 +109,13 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
page_alloc[i].page_offset += frag_info->frag_stride;
if (page_alloc[i].page_offset + frag_info->frag_stride <=
- ring_alloc[i].page_size)
- continue;
-
+ ring_alloc[i].page_size) {
+ WARN_ON(!page_alloc[i].page);
+ WARN_ON(!page_alloc[i].page_owner);
+ if (likely(page_alloc[i].page &&
+ page_alloc[i].page_owner))
+ continue;
+ }
if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
frag_info, gfp)))
goto out;
@@ -131,7 +139,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
page = page_alloc[i].page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc[i].page_size /
- priv->frag_info[i].frag_stride - 1);
+ priv->frag_info[i].frag_stride);
put_page(page);
}
}
@@ -146,11 +154,13 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
- if (next_frag_end > frags[i].page_size)
+ if (next_frag_end > frags[i].page_size) {
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
frag_info->dma_dir);
+ put_page(frags[i].page);
+ }
- if (frags[i].page)
+ if (frags[i].page_owner)
put_page(frags[i].page);
}
@@ -184,9 +194,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
page = page_alloc->page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc->page_size /
- priv->frag_info[i].frag_stride - 1);
+ priv->frag_info[i].frag_stride);
put_page(page);
page_alloc->page = NULL;
+ page_alloc->page_owner = false;
}
return -ENOMEM;
}
@@ -206,12 +217,14 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
dma_unmap_page(priv->ddev, page_alloc->dma,
page_alloc->page_size, frag_info->dma_dir);
+ put_page(page_alloc->page);
while (page_alloc->page_offset + frag_info->frag_stride <
page_alloc->page_size) {
put_page(page_alloc->page);
page_alloc->page_offset += frag_info->frag_stride;
}
page_alloc->page = NULL;
+ page_alloc->page_owner = false;
}
}
@@ -251,6 +264,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
if (ring->page_cache.index > 0) {
frags[0] = ring->page_cache.buf[--ring->page_cache.index];
rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+ WARN_ON(frags[0].page_owner);
+ if (likely(!frags[0].page_owner)) {
+ page_ref_inc(frags[0].page);
+ frags[0].page_owner = true;
+ }
return 0;
}
@@ -569,6 +587,7 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
priv->frag_info[0].dma_dir);
+ WARN_ON(frame->page_owner);
put_page(frame->page);
}
ring->page_cache.index = 0;
@@ -595,7 +614,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
frag_info = &priv->frag_info[nr];
if (length <= frag_info->frag_prefix_size)
break;
- if (unlikely(!frags[nr].page))
+ if (unlikely(!frags[nr].page_owner))
goto fail;
dma = be64_to_cpu(rx_desc->data[nr].addr);
@@ -607,7 +626,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
skb_frags_rx[nr].page_offset = frags[nr].page_offset;
skb->truesize += frag_info->frag_stride;
- frags[nr].page = NULL;
+ frags[nr].page_owner = false;
}
/* Adjust size of last fragment to match actual length */
if (nr > 0)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e2509bb..25f7f9e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -356,6 +356,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
.dma = tx_info->map0_dma,
.page_offset = 0,
.page_size = PAGE_SIZE,
+ .page_owner = false,
};
if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
@@ -1128,7 +1129,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
dma = frame->dma;
tx_info->page = frame->page;
- frame->page = NULL;
+ frame->page_owner = false;
tx_info->map0_dma = dma;
tx_info->map0_byte_count = length;
tx_info->nr_txbb = nr_txbb;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index df0f396..2c9d9a6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -261,6 +261,7 @@ struct mlx4_en_rx_alloc {
dma_addr_t dma;
u32 page_offset;
u32 page_size;
+ bool page_owner;
};
#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
--
1.9.1
Powered by blists - more mailing lists