lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1679766790-24629-1-git-send-email-mikelley@microsoft.com>
Date:   Sat, 25 Mar 2023 10:53:10 -0700
From:   Michael Kelley <mikelley@...rosoft.com>
To:     hch@....de, m.szyprowski@...sung.com, robin.murphy@....com,
        decui@...rosoft.com, tiala@...rosoft.com, iommu@...ts.linux.dev,
        linux-kernel@...r.kernel.org
Cc:     mikelley@...rosoft.com
Subject: [PATCH v2 1/1] swiotlb: Track and report io_tlb_used high water mark in debugfs

swiotlb currently reports the total number of slabs and the instantaneous
in-use slabs in debugfs. But with increased usage of swiotlb for all I/O
in Confidential Computing (coco) VMs, it has become difficult to know
how much memory to allocate for swiotlb bounce buffers, either via the
automatic algorithm in the kernel or by specifying a value on the
kernel boot line. The current automatic algorithm generously allocates
swiotlb bounce buffer memory, and may be wasting significant memory in
many use cases.

To support better understanding swiotlb usage, add tracking of the
the high water mark usage of swiotlb bounce buffer memory. Report the
high water mark in debugfs along with the other swiotlb metrics.  Allow
the high water to be reset to zero at runtime by writing to it.

Since a global in-use slab count is added alongside the existing
per-area in-use count, the mem_used() function that sums across all
areas is no longer needed.  Remove it and replace with the global
in-use count.

Signed-off-by: Michael Kelley <mikelley@...rosoft.com>

Changes in v2:
* Only reset the high water mark to zero when the specified new value
  is zero, to prevent confusion about the ability to reset to some
  other value [Dexuan Cui]

---
 kernel/dma/swiotlb.c | 49 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f9f0279..3e50639 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -76,6 +76,9 @@ struct io_tlb_slot {
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
 static unsigned long default_nareas;
 
+static atomic_long_t total_used = ATOMIC_LONG_INIT(0);
+static atomic_long_t used_hiwater = ATOMIC_LONG_INIT(0);
+
 /**
  * struct io_tlb_area - IO TLB memory area descriptor
  *
@@ -587,6 +590,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	unsigned long flags;
 	unsigned int slot_base;
 	unsigned int slot_index;
+	unsigned long old_hiwater, new_used;
 
 	BUG_ON(!nslots);
 	BUG_ON(area_index >= mem->nareas);
@@ -659,6 +663,14 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	area->index = wrap_area_index(mem, index + nslots);
 	area->used += nslots;
 	spin_unlock_irqrestore(&area->lock, flags);
+
+	new_used = atomic_long_add_return(nslots, &total_used);
+	old_hiwater = atomic_long_read(&used_hiwater);
+	do {
+		if (new_used <= old_hiwater)
+			break;
+	} while (!atomic_long_try_cmpxchg(&used_hiwater, &old_hiwater, new_used));
+
 	return slot_index;
 }
 
@@ -681,16 +693,6 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	return -1;
 }
 
-static unsigned long mem_used(struct io_tlb_mem *mem)
-{
-	int i;
-	unsigned long used = 0;
-
-	for (i = 0; i < mem->nareas; i++)
-		used += mem->areas[i].used;
-	return used;
-}
-
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		unsigned int alloc_align_mask, enum dma_data_direction dir,
@@ -723,7 +725,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
 	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-				 alloc_size, mem->nslabs, mem_used(mem));
+				 alloc_size, mem->nslabs, atomic_long_read(&total_used));
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
@@ -791,6 +793,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 		mem->slots[i].list = ++count;
 	area->used -= nslots;
 	spin_unlock_irqrestore(&area->lock, flags);
+
+	atomic_long_sub(nslots, &total_used);
 }
 
 /*
@@ -887,10 +891,29 @@ bool is_swiotlb_active(struct device *dev)
 
 static int io_tlb_used_get(void *data, u64 *val)
 {
-	*val = mem_used(&io_tlb_default_mem);
+	*val = (u64)atomic_long_read(&total_used);
 	return 0;
 }
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+	*val = (u64)atomic_long_read(&used_hiwater);
+	return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+	/* Only allow setting to zero */
+	if (val != 0)
+		return -EINVAL;
+
+	atomic_long_set(&used_hiwater, val);
+	return 0;
+}
+
 DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+				io_tlb_hiwater_set, "%llu\n");
 
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
 					 const char *dirname)
@@ -902,6 +925,8 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
 	debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
 			&fops_io_tlb_used);
+	debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, NULL,
+			&fops_io_tlb_hiwater);
 }
 
 static int __init __maybe_unused swiotlb_create_default_debugfs(void)
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ