lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240829180726.5022-4-suravee.suthikulpanit@amd.com>
Date: Thu, 29 Aug 2024 18:07:24 +0000
From: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
To: <linux-kernel@...r.kernel.org>, <iommu@...ts.linux.dev>
CC: <joro@...tes.org>, <robin.murphy@....com>, <vasant.hegde@....com>,
	<ubizjak@...il.com>, <jgg@...dia.com>, <jon.grimm@....com>,
	<santosh.shukla@....com>, <pandoh@...gle.com>, <kumaranand@...gle.com>,
	Suravee Suthikulpanit <suravee.suthikulpanit@....com>
Subject: [PATCH v2 3/5] iommu/amd: Introduce helper functions to access and update 256-bit DTE

The current implementation does not follow 128-bit write requirement
to update DTE as specified in the AMD I/O Virtualization Techonology
(IOMMU) Specification.

Therefore, modify the struct dev_table_entry to contain union of u128 data
array, and introduce two helper functions:

  * update_dte256() to update DTE using two 128-bit cmpxchg
    operations to update 256-bit DTE with the modified structure.
    Also use the struct iommu_dev_data.dte_sem to synchronize 256-bit
    data update.

  * get_dte256() to copy 256-bit DTE to the provided structrue.
    Also use the struct iommu_dev_data.dte_sem to synchronize 256-bit
    data access.

Also, update existing code to use the new helper functions in this and
subsequent patches.

Suggested-by: Jason Gunthorpe <jgg@...dia.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
---
 drivers/iommu/amd/amd_iommu_types.h |  5 +-
 drivers/iommu/amd/iommu.c           | 81 +++++++++++++++++++++++------
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 65f3a073999d..2787d6af5a59 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -884,7 +884,10 @@ extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
  * Structure defining one entry in the device table
  */
 struct dev_table_entry {
-	u64 data[4];
+	union {
+		u64 data[4];
+		u128 data128[2];
+	};
 };
 
 /*
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 994ed02842b9..93bca5c68bca 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -85,6 +85,47 @@ static void set_dte_entry(struct amd_iommu *iommu,
  *
  ****************************************************************************/
 
+static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+			  struct dev_table_entry *new)
+{
+	struct dev_table_entry *dev_table = get_dev_table(iommu);
+	struct dev_table_entry *ptr = &dev_table[dev_data->devid];
+	struct dev_table_entry old;
+	u128 tmp;
+
+	down_write(&dev_data->dte_sem);
+
+	old.data128[0] = ptr->data128[0];
+	old.data128[1] = ptr->data128[1];
+
+	tmp = cmpxchg128(&ptr->data128[1], old.data128[1], new->data128[1]);
+	if (tmp == old.data128[1]) {
+		if (!try_cmpxchg128(&ptr->data128[0], &old.data128[0], new->data128[0])) {
+			/* Restore hi 128-bit */
+			cmpxchg128(&ptr->data128[1], new->data128[1], tmp);
+			pr_err("%s: Failed. devid=%#x, dte=%016llx:%016llx:%016llx:%016llx\n",
+			       __func__, dev_data->devid, new->data[0], new->data[1],
+			       new->data[2], new->data[3]);
+		}
+	}
+
+	up_write(&dev_data->dte_sem);
+}
+
+static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+		      struct dev_table_entry *dte)
+{
+	struct dev_table_entry *ptr;
+	struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+	ptr = &dev_table[dev_data->devid];
+
+	down_read(&dev_data->dte_sem);
+	dte->data128[0] = ptr->data128[0];
+	dte->data128[1] = ptr->data128[1];
+	up_read(&dev_data->dte_sem);
+}
+
 static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
 {
 	return (pdom && (pdom->pd_mode == PD_MODE_V2));
@@ -233,8 +274,9 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid
 
 static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
 {
+	struct dev_table_entry dte;
 	struct amd_iommu *iommu;
-	struct dev_table_entry *dev_table;
+	struct iommu_dev_data *dev_data;
 	u16 devid = pci_dev_id(pdev);
 
 	if (devid == alias)
@@ -244,11 +286,19 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
 	if (!iommu)
 		return 0;
 
+	dev_data = dev_iommu_priv_get(&pdev->dev);
+	if (!dev_data)
+		return -EINVAL;
+
+	get_dte256(iommu, dev_data, &dte);
+
+	/* Setup for alias */
+	dev_data = search_dev_data(iommu, alias);
+	if (!dev_data)
+		return -EINVAL;
+
+	update_dte256(iommu, dev_data, &dte);
 	amd_iommu_set_rlookup_table(iommu, alias);
-	dev_table = get_dev_table(iommu);
-	memcpy(dev_table[alias].data,
-	       dev_table[devid].data,
-	       sizeof(dev_table[alias].data));
 
 	return 0;
 }
@@ -584,10 +634,13 @@ static void amd_iommu_uninit_device(struct device *dev)
 static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
 {
 	int i;
-	struct dev_table_entry *dev_table = get_dev_table(iommu);
+	struct dev_table_entry dte;
+	struct iommu_dev_data *dev_data = find_dev_data(iommu, devid);
+
+	get_dte256(iommu, dev_data, &dte);
 
 	for (i = 0; i < 4; ++i)
-		pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
+		pr_err("DTE[%d]: %016llx\n", i, dte.data[i]);
 }
 
 static void dump_command(unsigned long phys_addr)
@@ -2667,12 +2720,10 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
 					bool enable)
 {
 	struct protection_domain *pdomain = to_pdomain(domain);
-	struct dev_table_entry *dev_table;
 	struct iommu_dev_data *dev_data;
 	bool domain_flush = false;
 	struct amd_iommu *iommu;
 	unsigned long flags;
-	u64 pte_root;
 
 	spin_lock_irqsave(&pdomain->lock, flags);
 	if (!(pdomain->dirty_tracking ^ enable)) {
@@ -2681,16 +2732,16 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
 	}
 
 	list_for_each_entry(dev_data, &pdomain->dev_list, list) {
-		iommu = get_amd_iommu_from_dev_data(dev_data);
+		struct dev_table_entry dte;
 
-		dev_table = get_dev_table(iommu);
-		pte_root = dev_table[dev_data->devid].data[0];
+		iommu = get_amd_iommu_from_dev_data(dev_data);
+		get_dte256(iommu, dev_data, &dte);
 
-		pte_root = (enable ? pte_root | DTE_FLAG_HAD :
-				     pte_root & ~DTE_FLAG_HAD);
+		dte.data[0] = (enable ? dte.data[0] | DTE_FLAG_HAD :
+				     dte.data[0] & ~DTE_FLAG_HAD);
 
 		/* Flush device DTE */
-		dev_table[dev_data->devid].data[0] = pte_root;
+		update_dte256(iommu, dev_data, &dte);
 		device_flush_dte(dev_data);
 		domain_flush = true;
 	}
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ