lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEHaoC2ZQAd+h=7mKPOuMfAMex-41soD8=kx2vR_em+i71oBRw@mail.gmail.com>
Date: Sun, 3 Aug 2025 14:11:45 +0300
From: Constantine Gavrilov <cgavrilov@...inidat.com>
To: linux-kernel@...r.kernel.org, Marek Szyprowski <m.szyprowski@...sung.com>, 
	Robin Murphy <robin.murphy@....com>, iommu@...ts.linux.dev
Subject: [PATCH 4/8] Large DMA alloc/add APIs to query available range

This is the fifth patch from the set of patches that enable large IOMMU
DMA registrations. Entire work is available at the master branch of the
master branch of git@...hub.com:cgavrilov/linux.git repo.

Some devices (like NTB or GPU) allow mapping of the system memory to PCIe bars,
allowing to implement PCIe interconnects when devices are connected to more
that one root complex. After one root complex does the mapping, an application
on another root complex can access the memory using the PCIe bar of the device.
Since a typical system memory mapping uses offset translation (between the
device bar address and the DMA address), the device driver needs to know which
contiguous DMA address range is available to satisfy the device needs before it
can set up the mapping offset. This patch provides APIs to do this.

This patch was developed before the 6.16 kernel that provides functions
dma_iova_try_alloc() and and dma_iova_link() to help with this task. With
dma_iova_try_alloc(), the device driver can reserve a DMA address range for its
future use and use dma_iova_link() later to update IOMMU translations on the
reserved range. However, we do not have APIs that would allow allocations of
smaller regions from the reserved area that would provide functionality
similar to iommu_dma_alloc_iova(). This patch allows to query the available
range, set up the offset, and use standard DMA allocation APIs, after enforcing
the DMA mask constraint on the device.

commit 31b8abf68f5114dc90c1d38bd70e505727383666
Author: Constantine Gavrilov <cgavrilov@...inidat.com>
Date:   Thu Jun 26 23:20:40 2025 +0300

    Add APIs to query available DMA address range.

    This adds two exported functions:
    * iommu_domain_get_lowest_free_address_range()
    * iovad_get_lowest_free_address_range()

    NTB drivers that implement translation by offset can query the
availalble range
    and set the first region offset to the returned value and also set
the DMA max
    address to returned value + window size. Since DMA address allocation is
    from the top addresses, this allows the applications to request a large
    IOMMU registration that matches the NTB windows size.

    The prior query to iommu_domain_get_lowest_free_address_range() makes
    sure that a required DMA range is available and not used by other devices.

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6ba9be4fb64d..e78d7f8a2d61 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -769,6 +769,15 @@ ssize_t iommu_domain_show_busy_regions(struct
iommu_domain *domain, char *buf)
     return iovad_show_busy_regions(iovad, buf);
 }

+int iommu_domain_get_lowest_free_address_range(struct iommu_domain
*domain, struct addr_range_query *query, u64 *res)
+{
+    struct iommu_dma_cookie *cookie = domain->iova_cookie;
+    struct iova_domain *iovad = &cookie->iovad;
+
+    return iovad_get_lowest_free_address_range(iovad, query, res);
+}
+EXPORT_SYMBOL(iommu_domain_get_lowest_free_address_range);
+
 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
         size_t size, u64 dma_limit, struct device *dev, iova_align_t align)
 {
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 96144c58b386..aba58630be12 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -311,6 +311,59 @@ ssize_t iovad_show_busy_regions(struct
iova_domain *iovad, char *buf)
 }
 EXPORT_SYMBOL_GPL(iovad_show_busy_regions);

+/*
+ * Get a hint for lowest available address range.
+*/
+int iovad_get_lowest_free_address_range(struct iova_domain *iovad,
struct addr_range_query *query, u64 *res)
+{
+    struct rb_node *curr, *prev;
+    struct iova *curr_iova, *prev_iova;
+    unsigned long flags;
+    unsigned long shift = iova_shift(iovad);
+    int ret = -ENOMEM;
+
+    if (query->align) {
+        if (!is_power_of_2(query->align))
+            return -EINVAL;
+    }
+    if (query->addr_min >= query->addr_max)
+        return -EINVAL;
+
+    spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+    curr = &iovad->anchor.node;
+    curr_iova = rb_entry(curr, struct iova, node);
+    while(curr) {
+        prev = rb_prev(curr);
+        curr = prev;
+        if (prev) {
+            u64 free_start;
+            u64 free_end;
+            u64 alloc_end;
+            prev_iova = rb_entry(prev, struct iova, node);
+            free_start = (prev_iova->pfn_hi + 1) << shift;
+            free_end = (curr_iova->pfn_lo) << shift;
+            curr_iova = prev_iova;
+            if (query->align)
+                free_start = ALIGN(free_start, query->align);
+            alloc_end = free_start + query->size;
+
+            if (free_start < query->addr_min)
+                break;
+            if (alloc_end > query->addr_max)
+                continue; //does not match address consraint
+            if (free_start > alloc_end || free_start >= free_end ||
alloc_end > free_end)
+                continue; //overflow
+
+            ret = 0;
+            *res = free_start;
+        }
+    }
+    spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+
+    return ret;
+}
+EXPORT_SYMBOL(iovad_get_lowest_free_address_range);
+
 static struct iova *
 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5fe92c00221d..96ac4333f727 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/iova_bitmap.h>
+#include <linux/iova.h>

 #define IOMMU_READ    (1 << 0)
 #define IOMMU_WRITE    (1 << 1)
@@ -1512,6 +1513,7 @@ static inline void iommu_debugfs_setup(void) {}
 #ifdef CONFIG_IOMMU_DMA
 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 ssize_t iommu_domain_show_busy_regions(struct iommu_domain *domain, char *buf);
+int iommu_domain_get_lowest_free_address_range(struct iommu_domain
*domain, struct addr_range_query *query, u64 *res);
 #else /* CONFIG_IOMMU_DMA */
 static inline int iommu_get_msi_cookie(struct iommu_domain *domain,
dma_addr_t base)
 {
diff --git a/include/linux/iova.h b/include/linux/iova.h
index c09d224cce2b..30ce5ad499d2 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -80,6 +80,13 @@ static inline unsigned long iova_pfn(struct
iova_domain *iovad, dma_addr_t iova)
     return iova >> iova_shift(iovad);
 }

+struct addr_range_query {
+    u64 size;
+    u64 addr_min;
+    u64 addr_max;
+    u64 align;
+};
+
 #if IS_REACHABLE(CONFIG_IOMMU_IOVA)
 int iova_cache_get(void);
 void iova_cache_put(void);
@@ -97,6 +104,9 @@ void free_iova_fast(struct iova_domain *iovad,
unsigned long pfn,

 ssize_t iovad_show_busy_regions(struct iova_domain *iovad, char *buf);

+#define IOVAD_HAS_FREE_ADDR_RANGE
+int iovad_get_lowest_free_address_range(struct iova_domain *iovad,
struct addr_range_query *query, u64 *res);
+
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
                   unsigned long limit_pfn, bool flush_rcache,
iova_align_t align);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
@@ -129,6 +139,11 @@ ssize_t iovad_show_busy_regions(struct
iova_domain *iovad, char *buf)
     return -ENOTSUPP;
 }

+int iovad_get_lowest_free_address_range(struct iova_domain *iovad,
struct addr_range_query *query, u64 *res)
+{
+    return -ENOTSUPP;
+}
+
 static inline struct iova *alloc_iova(struct iova_domain *iovad,
                       unsigned long size,
                       unsigned long limit_pfn,


-- 
----------------------------------------
Constantine Gavrilov
System Architect and Platform Engineer
Infinidat
Ha-Menofim 9, Hertzelia
----------------------------------------

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ