linux-kernel - [PATCH v3 1/3] dmapool: Move pool metadata into non-DMA memory

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241122211144.4186080-3-bjohannesmeyer@gmail.com>
Date: Fri, 22 Nov 2024 22:11:43 +0100
From: Brian Johannesmeyer <bjohannesmeyer@...il.com>
To: Keith Busch <kbusch@...nel.org>,
	Christoph Hellwig <hch@...radead.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	linux-hardening@...r.kernel.org
Cc: Brian Johannesmeyer <bjohannesmeyer@...il.com>,
	Raphael Isemann <teemperor@...il.com>,
	Cristiano Giuffrida <giuffrida@...vu.nl>,
	Herbert Bos <h.j.bos@...nl>,
	Greg KH <gregkh@...uxfoundation.org>
Subject: [PATCH v3 1/3] dmapool: Move pool metadata into non-DMA memory

If a `struct dma_block` object resides in DMA memory, a malicious
peripheral device can corrupt its metadata --- specifically, its
`next_block` pointer, which links blocks in a DMA pool. By corrupting these
pointers, an attacker can manipulate `dma_pool_alloc()` into returning
attacker-controllable pointers, which can lead to kernel memory corruption
from a driver that calls it.

To prevent this, move the `struct dma_block` metadata into non-DMA memory,
ensuring that devices cannot tamper with the internal pointers of the DMA
pool allocator. Specifically:

- Add a `vaddr` field to `struct dma_block` to point to the actual
  DMA-accessible block.
- Maintain an array of `struct dma_block` objects in `struct dma_page` to
  track the metadata of each block within an allocated page.

This change secures the DMA pool allocator by keeping its metadata in
kernel memory, inaccessible to peripheral devices, thereby preventing
potential attacks that could corrupt kernel memory through DMA operations.

**Performance Impact**

Unfortunately, performance results from the `DMAPOOL_TEST` test show this
negatively affects performance. Before the patch:
```
dmapool test: size:16   align:16   blocks:8192 time:11860
dmapool test: size:64   align:64   blocks:8192 time:11951
dmapool test: size:256  align:256  blocks:8192 time:12287
dmapool test: size:1024 align:1024 blocks:2048 time:3134
dmapool test: size:4096 align:4096 blocks:1024 time:1686
dmapool test: size:68   align:32   blocks:8192 time:12050
```

After the patch:
```
dmapool test: size:16   align:16   blocks:8192 time:34432
dmapool test: size:64   align:64   blocks:8192 time:62262
dmapool test: size:256  align:256  blocks:8192 time:238137
dmapool test: size:1024 align:1024 blocks:2048 time:61386
dmapool test: size:4096 align:4096 blocks:1024 time:75342
dmapool test: size:68   align:32   blocks:8192 time:88243
```

While the performance impact is significant, this patch provides protection
against malicious devices tampering with DMA pool metadata. A subsequent
patch in this series introduces an optimization to mitigate the runtime
overhead.

Co-developed-by: Raphael Isemann <teemperor@...il.com>
Signed-off-by: Raphael Isemann <teemperor@...il.com>
Signed-off-by: Brian Johannesmeyer <bjohannesmeyer@...il.com>
---
 mm/dmapool.c | 62 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 52 insertions(+), 10 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index f0bfc6c490f4..3790ca4a631d 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -43,6 +43,7 @@
 struct dma_block {
 	struct dma_block *next_block;
 	dma_addr_t dma;
+	void *vaddr;
 };
 
 struct dma_pool {		/* the pool */
@@ -64,6 +65,8 @@ struct dma_page {		/* cacheable header for 'allocation' bytes */
 	struct list_head page_list;
 	void *vaddr;
 	dma_addr_t dma;
+	struct dma_block *blocks;
+	size_t blocks_per_page;
 };
 
 static DEFINE_MUTEX(pools_lock);
@@ -91,14 +94,35 @@ static ssize_t pools_show(struct device *dev, struct device_attribute *attr, cha
 
 static DEVICE_ATTR_RO(pools);
 
+static struct dma_block *pool_find_block(struct dma_pool *pool, void *vaddr)
+{
+	struct dma_page *page;
+	size_t offset, index;
+
+	list_for_each_entry(page, &pool->page_list, page_list) {
+		if (vaddr < page->vaddr)
+			continue;
+		offset = vaddr - page->vaddr;
+		if (offset >= pool->allocation)
+			continue;
+
+		index = offset / pool->size;
+		if (index >= page->blocks_per_page)
+			return NULL;
+
+		return &page->blocks[index];
+	}
+	return NULL;
+}
+
 #ifdef DMAPOOL_DEBUG
 static void pool_check_block(struct dma_pool *pool, struct dma_block *block,
 			     gfp_t mem_flags)
 {
-	u8 *data = (void *)block;
+	u8 *data = (void *)block->vaddr;
 	int i;
 
-	for (i = sizeof(struct dma_block); i < pool->size; i++) {
+	for (i = 0; i < pool->size; i++) {
 		if (data[i] == POOL_POISON_FREED)
 			continue;
 		dev_err(pool->dev, "%s %s, %p (corrupted)\n", __func__,
@@ -114,7 +138,7 @@ static void pool_check_block(struct dma_pool *pool, struct dma_block *block,
 	}
 
 	if (!want_init_on_alloc(mem_flags))
-		memset(block, POOL_POISON_ALLOCATED, pool->size);
+		memset(block->vaddr, POOL_POISON_ALLOCATED, pool->size);
 }
 
 static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
@@ -143,7 +167,7 @@ static bool pool_block_err(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
 	}
 
 	while (block) {
-		if (block != vaddr) {
+		if (block->vaddr != vaddr) {
 			block = block->next_block;
 			continue;
 		}
@@ -238,8 +262,6 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 
 	if (size == 0 || size > INT_MAX)
 		return NULL;
-	if (size < sizeof(struct dma_block))
-		size = sizeof(struct dma_block);
 
 	size = ALIGN(size, align);
 	allocation = max_t(size_t, size, PAGE_SIZE);
@@ -301,6 +323,7 @@ static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 {
 	unsigned int next_boundary = pool->boundary, offset = 0;
 	struct dma_block *block, *first = NULL, *last = NULL;
+	size_t i = 0;
 
 	pool_init_page(pool, page);
 	while (offset + pool->size <= pool->allocation) {
@@ -310,7 +333,8 @@ static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 			continue;
 		}
 
-		block = page->vaddr + offset;
+		block = &page->blocks[i];
+		block->vaddr = page->vaddr + offset;
 		block->dma = page->dma + offset;
 		block->next_block = NULL;
 
@@ -322,6 +346,7 @@ static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 
 		offset += pool->size;
 		pool->nr_blocks++;
+		i++;
 	}
 
 	last->next_block = pool->next_block;
@@ -339,9 +364,18 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
 	if (!page)
 		return NULL;
 
+	page->blocks_per_page = pool->allocation / pool->size;
+	page->blocks = kmalloc_array(page->blocks_per_page,
+				     sizeof(struct dma_block), GFP_KERNEL);
+	if (!page->blocks) {
+		kfree(page);
+		return NULL;
+	}
+
 	page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
 					 &page->dma, mem_flags);
 	if (!page->vaddr) {
+		kfree(page->blocks);
 		kfree(page);
 		return NULL;
 	}
@@ -383,6 +417,7 @@ void dma_pool_destroy(struct dma_pool *pool)
 		if (!busy)
 			dma_free_coherent(pool->dev, pool->allocation,
 					  page->vaddr, page->dma);
+		kfree(page->blocks);
 		list_del(&page->page_list);
 		kfree(page);
 	}
@@ -432,9 +467,9 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 	*handle = block->dma;
 	pool_check_block(pool, block, mem_flags);
 	if (want_init_on_alloc(mem_flags))
-		memset(block, 0, pool->size);
+		memset(block->vaddr, 0, pool->size);
 
-	return block;
+	return block->vaddr;
 }
 EXPORT_SYMBOL(dma_pool_alloc);
 
@@ -449,9 +484,16 @@ EXPORT_SYMBOL(dma_pool_alloc);
  */
 void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
 {
-	struct dma_block *block = vaddr;
+	struct dma_block *block;
 	unsigned long flags;
 
+	block = pool_find_block(pool, vaddr);
+	if (!block) {
+		dev_err(pool->dev, "%s %s, invalid vaddr %p\n",
+			__func__, pool->name, vaddr);
+		return;
+	}
+
 	spin_lock_irqsave(&pool->lock, flags);
 	if (!pool_block_err(pool, vaddr, dma)) {
 		pool_block_push(pool, block, dma);
-- 
2.34.1