lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240222151618.GA13330@nvidia.com>
Date: Thu, 22 Feb 2024 11:16:18 -0400
From: Jason Gunthorpe <jgg@...dia.com>
To: Yi Liu <yi.l.liu@...el.com>
Cc: joro@...tes.org, kevin.tian@...el.com, baolu.lu@...ux.intel.com,
	alex.williamson@...hat.com, robin.murphy@....com,
	eric.auger@...hat.com, nicolinc@...dia.com, kvm@...r.kernel.org,
	chao.p.peng@...ux.intel.com, yi.y.sun@...ux.intel.com,
	iommu@...ts.linux.dev, linux-kernel@...r.kernel.org,
	linux-kselftest@...r.kernel.org, zhenzhong.duan@...el.com,
	joao.m.martins@...cle.com
Subject: Re: [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent
 domain

On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote:
> > It doesn't mean that the S2 is globally shared across all the nesting
> > translations (like ARM does), and you still have to iterate over every
> > nesting DID.
> > 
> > In light of that this design seems to have gone a bit off..
> > 
> > A domain should have a list of places that need invalidation,
> > specifically a list of DIDs and ATCs that need an invalidation to be
> > issued.
> > 
> > Instead we now somehow have 4 different lists in the domain the
> > invalidation code iterates over?
> > 
> > So I would think this:
> > 
> > struct dmar_domain {
> > 	struct xarray iommu_array;	/* Attached IOMMU array */
> > 	struct list_head devices;	/* all devices' list */
> > 	struct list_head dev_pasids;	/* all attached pasids */
> > 	struct list_head s1_domains;
> > 
> > Would make sense to be collapsed into one logical list of attached
> > devices:
> > 
> > struct intel_iommu_domain_attachment {
> >     unsigned int did;
> >     ioasid_t pasid;
> >     struct device_domain_info *info;
> >     list_head item;
> > };
> > 
> > When you attach a S1/S2 nest you allocate two of the above structs and
> > one is linked on the S1 and one is linked on the S2..
> 
> yes, this shall work. ATC flushing should be fine. But there can be a
> drawback when flushing DIDs. VT-d side, if there are multiple devices
> behind the same iommu unit, just need one DID flushing is enough. But
> after the above change, the number of DID flushing would increase per
> the number of devices. Although no functional issue, but it submits
> duplicated invalidation.

At least the three server drivers all have this same problem, I would
like to seem some core code to help solve it, since it is tricky and
the RCU idea is good..

Collapsing invalidations can be done by sorting, I think this was
Robin's suggestion.  But we could also easially maintain multiple list
threads hidden inside the API, or allocate a multi-level list.

Something very approximately like this:

Jason

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d14413916f93a0..7b2de139e7c437 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -38,6 +38,8 @@
 
 #include "iommu-sva.h"
 
+#include <linux/iommu-alist.h>
+
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 static DEFINE_IDA(iommu_global_pasid_ida);
diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h
new file mode 100644
index 00000000000000..7a9243f8b2b5f8
--- /dev/null
+++ b/include/linux/iommu-alist.h
@@ -0,0 +1,126 @@
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/iommu.h>
+
+/*
+ * Place in an iommu_domain to keep track of what devices have been attached to
+ * it.
+ */
+struct iommu_attach_list {
+	spinlock_t lock;
+	struct list_head all;
+};
+
+/*
+ * Allocate for every time a device is attached to a domain
+ */
+struct iommu_attach_elm {
+	/*
+	 * Note that this pointer is accessed under RCU, the driver has to be
+	 * careful to rcu free it.
+	 */
+	struct iommu_device *iommu_device;
+	ioasid_t pasid;
+	u8 using_atc;
+	struct list_head all_item;
+
+	/* May not be accessed under RCU! */
+	struct device *device;
+};
+
+void iommu_attach_init(struct iommu_attach_list *alist)
+{
+	spin_lock_init(&alist->lock);
+}
+
+int iommu_attach_add(struct iommu_attach_list *alist,
+		     struct iommu_attach_elm *elm)
+{
+	struct list_head *insert_after;
+
+	spin_lock(&alist->lock);
+	insert_after = list_find_sorted_insertion(alist, elm, cmp);
+	list_add_rcu(&elm->all_item, insert_after);
+	spin_unlock(&alist->lock);
+}
+
+void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm)
+{
+	spin_lock(&alist->lock);
+	list_del_rcu(&elm->all_item);
+	spin_unlock(&alist->lock);
+	/* assumes 0 offset */
+	kfree_rcu_mightsleep(elm);
+}
+
+static struct iommu_attach_elm *
+__iommu_attach_next_iommu(struct iommu_attach_list *alist,
+			  struct iommu_attach_elm *pos,
+			  struct iommu_attach_elm *last)
+{
+	struct iommu_attach_elm *next;
+
+	do {
+		next = list_next_or_null_rcu(&alist->all, &pos->all_item,
+					     struct iommu_attach_elm, all_item);
+		if (!next)
+			return NULL;
+		if (!last)
+			return next;
+	} while (pos->iommu_device == next->iommu_device);
+	return next;
+}
+
+/* assumes 0 offset */
+#define iommu_attach_next_iommu(alist, pos, last, member)             \
+	container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \
+					       &(last)->member),      \
+		     typeof(*pos), member)
+
+#define iommu_attach_for_each_iommu(pos, last, alist, member)              \
+	for (({                                                            \
+		     RCU_LOCKDEP_WARN(                                     \
+			     !rcu_read_lock_any_held(),                    \
+			     "RCU-list traversed in non-reader section!"); \
+	     }),                                                           \
+	     pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos),     \
+					  member.all_item),                \
+	     last = NULL;                                                  \
+	     pos; pos = iommu_attach_next_iommu(alist, pos, last, member))
+
+/* Example */
+struct driver_domain {
+	struct iommu_domain domain;
+	struct iommu_attach_list alist;
+};
+
+struct driver_attach_elm {
+	struct iommu_attach_elm aelm;
+	unsigned int cache_tag;
+};
+
+static void example(struct driver_domain *domain)
+{
+	struct driver_attach_elm *elm;
+	struct driver_attach_elm *pos, *last;
+	bool need_atc;
+
+	iommu_attach_init(&domain->alist);
+
+	elm = kzalloc(sizeof(*elm), GFP_KERNEL);
+	iommu_attach_add(&domain->alist, &elm->aelm);
+
+	rcu_read_lock();
+	iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) {
+		invalidate_iommu(elm);
+		need_atc |= elm->aelm.using_atc;
+	}
+	if (need_atc) {
+		list_for_each_entry_rcu(pos, &domain->alist.all,
+					aelm.all_item) {
+			if (pos->aelm.using_atc)
+				invalidate_atc(elm);
+		}
+	}
+	rcu_read_unlock();
+}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ