lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 12 Dec 2022 13:51:38 -0800
From:   Georgi Djakov <quic_c_gdjako@...cinc.com>
To:     <robh+dt@...nel.org>, <krzysztof.kozlowski+dt@...aro.org>,
        <devicetree@...r.kernel.org>
CC:     <joro@...tes.org>, <will@...nel.org>, <robin.murphy@....com>,
        <iommu@...ts.linux.dev>, <linux-kernel@...r.kernel.org>,
        <djakov@...nel.org>
Subject: [RFC v2 2/2] iommu/iova: Add support for best-fit algorithm

In some multimedia use-cases that involve allocating buffers less than
PAGE_SIZE, combined with allocating ~400MB buffers, the IOVA space is
getting fragmented and allocation failures are observed.

In these scenarios, there are hundreds of randomly distributed non-power
of two allocations, which in some cases leave behind holes of up to 100MB
in the IOVA space.

To reduce the memory fragmentation in these use-cases, introduce the
"best-fit" algorithm, which can be enabled by individual devices with
a device-tree property. If the DT property is not present, the default
"first-fit" algorithm will be used.

Example usage:

	device {
		compatible = "example-device";
       		iommus = <&smmu 0x21 0>;
		iova-best-fit;
	};

Signed-off-by: Georgi Djakov <quic_c_gdjako@...cinc.com>
---

RFC v1: https://lore.kernel.org/r/1581721602-17010-1-git-send-email-isaacm@codeaurora.org/
- Use a DT property instead of an API to enable the best-fit algorithm for the device
- Rename __alloc_and_insert_iova_range() to __alloc_and_insert_iova_first_fit()


 drivers/iommu/dma-iommu.c |  5 +++
 drivers/iommu/iova.c      | 79 ++++++++++++++++++++++++++++++++++++---
 include/linux/iova.h      |  1 +
 3 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f798c44e0903..1be25e54d050 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -582,6 +582,11 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	if (ret)
 		goto done_unlock;
 
+	if (dev && dev->of_node) {
+		if (of_property_read_bool(dev->of_node, "iommu-best-fit"))
+			iovad->best_fit = true;
+	}
+
 	/* If the FQ fails we can simply fall back to strict mode */
 	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
 		domain->type = IOMMU_DOMAIN_DMA;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index a44ad92fc5eb..34ad05a4095f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -64,6 +64,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->cached_node = &iovad->anchor.node;
 	iovad->cached32_node = &iovad->anchor.node;
 	iovad->granule = granule;
+	iovad->best_fit = false;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
@@ -175,9 +176,10 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 	rb_insert_color(&iova->node, root);
 }
 
-static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
-		unsigned long size, unsigned long limit_pfn,
-			struct iova *new, bool size_aligned)
+static int __alloc_and_insert_iova_first_fit(struct iova_domain *iovad,
+					     unsigned long size,
+					     unsigned long limit_pfn,
+					     struct iova *new, bool size_aligned)
 {
 	struct rb_node *curr, *prev;
 	struct iova *curr_iova;
@@ -236,6 +238,68 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	return -ENOMEM;
 }
 
+static int __alloc_and_insert_iova_best_fit(struct iova_domain *iovad,
+					    unsigned long size,
+					    unsigned long limit_pfn,
+					    struct iova *new, bool size_aligned)
+{
+	struct rb_node *curr, *prev;
+	struct iova *curr_iova, *prev_iova;
+	unsigned long flags;
+	unsigned long align_mask = ~0UL;
+	struct rb_node *candidate_rb_parent;
+	unsigned long new_pfn, candidate_pfn = ~0UL;
+	unsigned long gap, candidate_gap = ~0UL;
+
+	if (size_aligned)
+		align_mask <<= fls_long(size - 1);
+
+	/* Walk the tree backwards */
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	curr = &iovad->anchor.node;
+	prev = rb_prev(curr);
+	for (; prev; curr = prev, prev = rb_prev(curr)) {
+		curr_iova = rb_entry(curr, struct iova, node);
+		prev_iova = rb_entry(prev, struct iova, node);
+
+		limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+		new_pfn = (limit_pfn - size) & align_mask;
+		gap = curr_iova->pfn_lo - prev_iova->pfn_hi - 1;
+		if (limit_pfn >= size && new_pfn > prev_iova->pfn_hi && gap < candidate_gap) {
+			candidate_gap = gap;
+			candidate_pfn = new_pfn;
+			candidate_rb_parent = curr;
+			if (gap == size)
+				goto insert;
+		}
+	}
+
+	curr_iova = rb_entry(curr, struct iova, node);
+	limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+	new_pfn = (limit_pfn - size) & align_mask;
+	gap = curr_iova->pfn_lo - iovad->start_pfn;
+	if (limit_pfn >= size && new_pfn >= iovad->start_pfn && gap < candidate_gap) {
+		candidate_gap = gap;
+		candidate_pfn = new_pfn;
+		candidate_rb_parent = curr;
+	}
+
+insert:
+	if (candidate_pfn == ~0UL) {
+		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* pfn_lo will point to size aligned address if size_aligned is set */
+	new->pfn_lo = candidate_pfn;
+	new->pfn_hi = new->pfn_lo + size - 1;
+
+	/* If we have 'prev', it's a valid place to start the insertion. */
+	iova_insert_rbtree(&iovad->rbroot, new, candidate_rb_parent);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return 0;
+}
+
 static struct kmem_cache *iova_cache;
 static unsigned int iova_cache_users;
 static DEFINE_MUTEX(iova_cache_mutex);
@@ -322,8 +386,13 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (!new_iova)
 		return NULL;
 
-	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
-			new_iova, size_aligned);
+	if (iovad->best_fit) {
+		ret = __alloc_and_insert_iova_best_fit(iovad, size, limit_pfn + 1,
+						       new_iova, size_aligned);
+	} else {
+		ret = __alloc_and_insert_iova_first_fit(iovad, size, limit_pfn + 1,
+							new_iova, size_aligned);
+	}
 
 	if (ret) {
 		free_iova_mem(new_iova);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 83c00fac2acb..c0c77f676e17 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -38,6 +38,7 @@ struct iova_domain {
 
 	struct iova_rcache	*rcaches;
 	struct hlist_node	cpuhp_dead;
+	bool best_fit;			/* best-fit algorithm is enabled */
 };
 
 static inline unsigned long iova_size(struct iova *iova)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ