lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260113030052.977366-2-baolu.lu@linux.intel.com>
Date: Tue, 13 Jan 2026 11:00:46 +0800
From: Lu Baolu <baolu.lu@...ux.intel.com>
To: Joerg Roedel <joro@...tes.org>,
	Will Deacon <will@...nel.org>,
	Robin Murphy <robin.murphy@....com>,
	Kevin Tian <kevin.tian@...el.com>,
	Jason Gunthorpe <jgg@...dia.com>
Cc: Dmytro Maluka <dmaluka@...omium.org>,
	Samiullah Khawaja <skhawaja@...gle.com>,
	iommu@...ts.linux.dev,
	linux-kernel@...r.kernel.org,
	Lu Baolu <baolu.lu@...ux.intel.com>
Subject: [PATCH 1/3] iommu/vt-d: Use 128-bit atomic updates for context entries

On Intel IOMMU, device context entries are accessed by hardware in
128-bit chunks. Currently, the driver updates these entries by
programming the 'lo' and 'hi' 64-bit fields individually.

This creates a potential race condition where the IOMMU hardware may fetch
a context entry while the CPU has only completed one of the two 64-bit
writes. This "torn" entry — consisting of half-old and half-new data —
could lead to unpredictable hardware behavior, especially when
transitioning the 'Present' bit or changing translation types.

To ensure the IOMMU hardware always observes a consistent state, use
128-bit atomic updates for context entries. This is achieved by building
context entries on the stack and write them to the table in a single
operation.

As this relies on arch_cmpxchg128_local(), restrict INTEL_IOMMU
dependencies to X86_64.

Fixes: ba39592764ed2 ("Intel IOMMU: Intel IOMMU driver")
Reported-by: Dmytro Maluka <dmaluka@...omium.org>
Closes: https://lore.kernel.org/all/aTG7gc7I5wExai3S@google.com/
Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
---
 drivers/iommu/intel/Kconfig |  2 +-
 drivers/iommu/intel/iommu.h | 22 ++++++++++++++++++----
 drivers/iommu/intel/iommu.c | 30 +++++++++++++++---------------
 drivers/iommu/intel/pasid.c | 18 +++++++++---------
 4 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5471f814e073..efda19820f95 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -11,7 +11,7 @@ config DMAR_DEBUG
 
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
-	depends on PCI_MSI && ACPI && X86
+	depends on PCI_MSI && ACPI && X86_64
 	select IOMMU_API
 	select GENERIC_PT
 	select IOMMU_PT
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 25c5e22096d4..b8999802f401 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -546,6 +546,16 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+static __always_inline void intel_iommu_atomic128_set(u128 *ptr, u128 val)
+{
+	/*
+	 * Use the cmpxchg16b instruction for 128-bit atomicity. As updates
+	 * are serialized by a spinlock, we use the local (unlocked) variant
+	 * to avoid unnecessary bus locking overhead.
+	 */
+	arch_cmpxchg128_local(ptr, *ptr, val);
+}
+
 /*
  * 0: Present
  * 1-11: Reserved
@@ -569,8 +579,13 @@ struct root_entry {
  * 8-23: domain id
  */
 struct context_entry {
-	u64 lo;
-	u64 hi;
+	union {
+		struct {
+			u64 lo;
+			u64 hi;
+		};
+		u128 val128;
+	};
 };
 
 struct iommu_domain_info {
@@ -946,8 +961,7 @@ static inline int context_domain_id(struct context_entry *c)
 
 static inline void context_clear_entry(struct context_entry *context)
 {
-	context->lo = 0;
-	context->hi = 0;
+	intel_iommu_atomic128_set(&context->val128, 0);
 }
 
 #ifdef CONFIG_INTEL_IOMMU
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 134302fbcd92..d721061ebda2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1147,8 +1147,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 			domain_lookup_dev_info(domain, iommu, bus, devfn);
 	u16 did = domain_id_iommu(domain, iommu);
 	int translation = CONTEXT_TT_MULTI_LEVEL;
+	struct context_entry *context, new = {0};
 	struct pt_iommu_vtdss_hw_info pt_info;
-	struct context_entry *context;
 	int ret;
 
 	if (WARN_ON(!intel_domain_is_ss_paging(domain)))
@@ -1170,19 +1170,19 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		goto out_unlock;
 
 	copied_context_tear_down(iommu, context, bus, devfn);
-	context_clear_entry(context);
-	context_set_domain_id(context, did);
+	context_set_domain_id(&new, did);
 
 	if (info && info->ats_supported)
 		translation = CONTEXT_TT_DEV_IOTLB;
 	else
 		translation = CONTEXT_TT_MULTI_LEVEL;
 
-	context_set_address_root(context, pt_info.ssptptr);
-	context_set_address_width(context, pt_info.aw);
-	context_set_translation_type(context, translation);
-	context_set_fault_enable(context);
-	context_set_present(context);
+	context_set_address_root(&new, pt_info.ssptptr);
+	context_set_address_width(&new, pt_info.aw);
+	context_set_translation_type(&new, translation);
+	context_set_fault_enable(&new);
+	context_set_present(&new);
+	intel_iommu_atomic128_set(&context->val128, new.val128);
 	if (!ecap_coherent(iommu->ecap))
 		clflush_cache_range(context, sizeof(*context));
 	context_present_cache_flush(iommu, did, bus, devfn);
@@ -3771,8 +3771,8 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
 {
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct context_entry *context, new = {0};
 	struct intel_iommu *iommu = info->iommu;
-	struct context_entry *context;
 
 	spin_lock(&iommu->lock);
 	context = iommu_context_addr(iommu, bus, devfn, 1);
@@ -3787,17 +3787,17 @@ static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
 	}
 
 	copied_context_tear_down(iommu, context, bus, devfn);
-	context_clear_entry(context);
-	context_set_domain_id(context, FLPT_DEFAULT_DID);
+	context_set_domain_id(&new, FLPT_DEFAULT_DID);
 
 	/*
 	 * In pass through mode, AW must be programmed to indicate the largest
 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
 	 */
-	context_set_address_width(context, iommu->msagaw);
-	context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
-	context_set_fault_enable(context);
-	context_set_present(context);
+	context_set_address_width(&new, iommu->msagaw);
+	context_set_translation_type(&new, CONTEXT_TT_PASS_THROUGH);
+	context_set_fault_enable(&new);
+	context_set_present(&new);
+	intel_iommu_atomic128_set(&context->val128, new.val128);
 	if (!ecap_coherent(iommu->ecap))
 		clflush_cache_range(context, sizeof(*context));
 	context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 3e2255057079..298a39183996 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -978,23 +978,23 @@ static int context_entry_set_pasid_table(struct context_entry *context,
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 	struct pasid_table *table = info->pasid_table;
 	struct intel_iommu *iommu = info->iommu;
+	struct context_entry new = {0};
 	unsigned long pds;
 
-	context_clear_entry(context);
-
 	pds = context_get_sm_pds(table);
-	context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
-	context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
+	new.lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
+	context_set_sm_rid2pasid(&new, IOMMU_NO_PASID);
 
 	if (info->ats_supported)
-		context_set_sm_dte(context);
+		context_set_sm_dte(&new);
 	if (info->pasid_supported)
-		context_set_pasid(context);
+		context_set_pasid(&new);
 	if (info->pri_supported)
-		context_set_sm_pre(context);
+		context_set_sm_pre(&new);
 
-	context_set_fault_enable(context);
-	context_set_present(context);
+	context_set_fault_enable(&new);
+	context_set_present(&new);
+	intel_iommu_atomic128_set(&context->val128, new.val128);
 	__iommu_flush_cache(iommu, context, sizeof(*context));
 
 	return 0;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ