[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251202230303.1017519-7-skhawaja@google.com>
Date: Tue, 2 Dec 2025 23:02:36 +0000
From: Samiullah Khawaja <skhawaja@...gle.com>
To: David Woodhouse <dwmw2@...radead.org>, Lu Baolu <baolu.lu@...ux.intel.com>,
Joerg Roedel <joro@...tes.org>, Will Deacon <will@...nel.org>,
Pasha Tatashin <pasha.tatashin@...een.com>, Jason Gunthorpe <jgg@...pe.ca>, iommu@...ts.linux.dev
Cc: YiFei Zhu <zhuyifei@...gle.com>, Samiullah Khawaja <skhawaja@...gle.com>,
Robin Murphy <robin.murphy@....com>, Pratyush Yadav <pratyush@...nel.org>,
Kevin Tian <kevin.tian@...el.com>, Alex Williamson <alex@...zbot.org>, linux-kernel@...r.kernel.org,
Saeed Mahameed <saeedm@...dia.com>, Adithya Jayachandran <ajayachandra@...dia.com>,
Parav Pandit <parav@...dia.com>, Leon Romanovsky <leonro@...dia.com>, William Tu <witu@...dia.com>,
Vipin Sharma <vipinsh@...gle.com>, dmatlack@...gle.com, Chris Li <chrisl@...nel.org>,
praan@...gle.com
Subject: [RFC PATCH v2 06/32] iommufd-lu: Persist iommu hardware pagetables
for live update
From: YiFei Zhu <zhuyifei@...gle.com>
The caller is expected to mark each HWPT to be preserved with an ioctl
call, with a token that will be used in restore. At preserve time, each
HWPT's domain is then called with iommu_domain_preserve to preserve the
iommu domain.
On restore, each preserved HWPT is expected to be restored with another
ioctl call, This HWPT will be recreated without a parent IOAS, and its
domain recreated with iommu_domain_restore. The caller is expected to
later swap the old restored attachments with newly created HWPTs through
normal means such as VFIO_DEVICE_ATTACH_IOMMUFD_PT.
Signed-off-by: YiFei Zhu <zhuyifei@...gle.com>
Signed-off-by: Samiullah Khawaja <skhawaja@...gle.com>
---
drivers/iommu/iommufd/iommufd_private.h | 6 +-
drivers/iommu/iommufd/liveupdate.c | 161 +++++++++++++++++++++++-
drivers/iommu/iommufd/main.c | 19 +++
include/linux/kho/abi/iommufd.h | 8 ++
4 files changed, 189 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 54c7c9888de3..15afff6ba0ea 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -726,9 +726,13 @@ iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id)
int iommufd_liveupdate_register_lufs(void);
int iommufd_liveupdate_unregister_lufs(void);
-
int iommufd_hwpt_lu_set_preserved(struct iommufd_ucmd *ucmd);
int iommufd_hwpt_lu_restore(struct iommufd_ucmd *ucmd);
+
+/* TODO */
+#define iommu_domain_restore(x) ERR_PTR(-EOPNOTSUPP)
+#define iommu_domain_preserve(x, y) (-EOPNOTSUPP)
+#define iommu_domain_has_attachments(x) (false)
#else
static inline int iommufd_liveupdate_register_lufs(void)
{
diff --git a/drivers/iommu/iommufd/liveupdate.c b/drivers/iommu/iommufd/liveupdate.c
index 83d1b888d914..42b380229c57 100644
--- a/drivers/iommu/iommufd/liveupdate.c
+++ b/drivers/iommu/iommufd/liveupdate.c
@@ -9,6 +9,7 @@
#include <linux/kho/abi/iommufd.h>
#include <linux/liveupdate.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include "iommufd_private.h"
@@ -53,6 +54,82 @@ int iommufd_hwpt_lu_set_preserved(struct iommufd_ucmd *ucmd)
return rc;
}
+static int iommufd_save_hwpts(struct iommufd_ctx *ictx,
+ struct iommufd_lu *iommufd_lu)
+{
+ struct iommufd_hwpt_paging *hwpt, **hwpts = NULL;
+ struct iommufd_hwpt_lu *hwpt_lu;
+ struct iommufd_object *obj;
+ unsigned int nr_hwpts = 0;
+ unsigned long index;
+ unsigned int i;
+ int rc = 0;
+
+ if (iommufd_lu) {
+ hwpts = kcalloc(iommufd_lu->nr_hwpts, sizeof(*hwpts),
+ GFP_KERNEL);
+ if (!hwpts)
+ return -ENOMEM;
+ }
+
+ xa_lock(&ictx->objects);
+ xa_for_each(&ictx->objects, index, obj) {
+ if (obj->type != IOMMUFD_OBJ_HWPT_PAGING)
+ continue;
+
+ hwpt = container_of(obj, struct iommufd_hwpt_paging, common.obj);
+ if (!hwpt->lu_preserved)
+ continue;
+
+ /*
+ * TODO: The HWPT should be made immutable, and cannot be
+ * destroyed
+ */
+
+ if (!hwpt->common.domain) {
+ rc = -EINVAL;
+ xa_unlock(&ictx->objects);
+ goto out;
+ }
+
+ if (iommufd_lu) {
+ hwpts[nr_hwpts] = hwpt;
+ hwpt_lu = &iommufd_lu->hwpts[nr_hwpts];
+
+ hwpt_lu->token = hwpt->lu_token;
+ hwpt_lu->reclaimed = false;
+ }
+
+ nr_hwpts++;
+ }
+ xa_unlock(&ictx->objects);
+
+ if (WARN_ON(iommufd_lu && iommufd_lu->nr_hwpts != nr_hwpts)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ if (iommufd_lu) {
+ /*
+ * iommu_domain_preserve may sleep and must be called
+ * outside of xa_lock
+ */
+ for (i = 0; i < nr_hwpts; i++) {
+ hwpt = hwpts[i];
+ hwpt_lu = &iommufd_lu->hwpts[i];
+
+ rc = iommu_domain_preserve(hwpt->common.domain, &hwpt_lu->domain_data);
+ goto out;
+ }
+ }
+
+ rc = nr_hwpts;
+
+out:
+ kfree(hwpts);
+ return rc;
+}
+
static int iommufd_liveupdate_preserve(struct liveupdate_file_op_args *args)
{
struct iommufd_ctx *ictx = iommufd_ctx_from_file(args->file);
@@ -64,7 +141,11 @@ static int iommufd_liveupdate_preserve(struct liveupdate_file_op_args *args)
if (IS_ERR(ictx))
return PTR_ERR(ictx);
- serial_size = sizeof(*iommufd_lu);
+ rc = iommufd_save_hwpts(ictx, NULL);
+ if (rc < 0)
+ goto err_ctx_put;
+
+ serial_size = struct_size(iommufd_lu, hwpts, rc);
mem = kho_alloc_preserve(serial_size);
if (!mem) {
@@ -73,11 +154,17 @@ static int iommufd_liveupdate_preserve(struct liveupdate_file_op_args *args)
}
iommufd_lu = mem;
+ iommufd_lu->nr_hwpts = rc;
+ rc = iommufd_save_hwpts(ictx, iommufd_lu);
+ if (rc < 0)
+ goto err_free;
args->serialized_data = virt_to_phys(iommufd_lu);
iommufd_ctx_put(ictx);
return 0;
+err_free:
+ kho_unpreserve_free(mem);
err_ctx_put:
iommufd_ctx_put(ictx);
return rc;
@@ -92,10 +179,31 @@ static int iommufd_liveupdate_freeze(struct liveupdate_file_op_args *args)
static void iommufd_liveupdate_unpreserve(struct liveupdate_file_op_args *args)
{
struct iommufd_ctx *ictx = iommufd_ctx_from_file(args->file);
+ struct iommufd_hwpt_paging *hwpt;
+ struct iommufd_object *obj;
+ unsigned long index;
if (WARN_ON(IS_ERR(ictx)))
return;
+ xa_lock(&ictx->objects);
+ xa_for_each(&ictx->objects, index, obj) {
+ if (obj->type != IOMMUFD_OBJ_HWPT_PAGING)
+ continue;
+
+ hwpt = container_of(obj, struct iommufd_hwpt_paging, common.obj);
+ if (!hwpt->lu_preserved)
+ continue;
+
+ /* TODO: The HWPT should be made mutable again */
+
+ if (!hwpt->common.domain)
+ continue;
+
+ /* TODO: WARN_ON(iommu_domain_unpreserve(hwpt->common.domain)); */
+ }
+ xa_unlock(&ictx->objects);
+
kho_unpreserve_free(phys_to_virt(args->serialized_data));
iommufd_ctx_put(ictx);
}
@@ -164,7 +272,53 @@ static bool iommufd_liveupdate_can_finish(struct liveupdate_file_op_args *args)
int iommufd_hwpt_lu_restore(struct iommufd_ucmd *ucmd)
{
- return -ENOTTY;
+ struct iommu_hwpt_lu_restore *cmd = ucmd->cmd;
+ struct iommufd_hwpt_paging *hwpt = NULL;
+ struct iommufd_ctx *ictx = ucmd->ictx;
+ struct iommufd_hwpt_lu *hwpt_lu;
+ struct iommufd_lu *iommufd_lu;
+ struct iommu_domain *domain;
+ unsigned int i;
+ int rc;
+
+ iommufd_lu = ictx->lu;
+ if (!iommufd_lu)
+ return -ENOTTY;
+
+ for (i = 0; i < iommufd_lu->nr_hwpts; i++) {
+ hwpt_lu = &iommufd_lu->hwpts[i];
+
+ if (hwpt_lu->reclaimed)
+ continue;
+
+ if (hwpt_lu->token == cmd->hwpt_token)
+ goto hwpt_found;
+ }
+
+ return -ENOENT;
+
+hwpt_found:
+ hwpt = _iommufd_hwpt_paging_alloc(ictx);
+ if (IS_ERR(hwpt))
+ return PTR_ERR(hwpt);
+
+ /* a successful iommu_domain_restore mars the point of no return */
+ domain = iommu_domain_restore(hwpt_lu->domain_data);
+ if (IS_ERR(domain)) {
+ rc = PTR_ERR(domain);
+ goto err_destroy;
+ }
+
+ iommufd_hwpt_init_from_domain(&hwpt->common, domain);
+ iommufd_object_finalize(ictx, &hwpt->common.obj);
+
+ hwpt_lu->reclaimed = true;
+ cmd->pt_id = hwpt->common.obj.id;
+ return 0;
+
+err_destroy:
+ iommufd_object_abort_and_destroy(ictx, &hwpt->common.obj);
+ return rc;
}
static void iommufd_liveupdate_finish(struct liveupdate_file_op_args *args)
@@ -175,9 +329,8 @@ static void iommufd_liveupdate_finish(struct liveupdate_file_op_args *args)
ictx = iommufd_ctx_from_file(args->file);
iommufd_lu = ictx->lu;
ictx->lu = NULL;
- iommufd_ctx_put(ictx);
-
folio_put(virt_to_folio(iommufd_lu));
+ iommufd_ctx_put(ictx);
}
static bool iommufd_liveupdate_can_preserve(struct liveupdate_file_handler *handler,
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b63f61331cae..a334e3da3f45 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -207,6 +207,8 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
struct iommufd_object *to_destroy, u32 id,
unsigned int flags)
{
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommu_domain *domain;
struct iommufd_object *obj;
XA_STATE(xas, &ictx->objects, id);
bool zerod_wait_cnt = false;
@@ -250,6 +252,23 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
goto err_xa;
}
+ if (obj->type == IOMMUFD_OBJ_HWPT_PAGING) {
+ /*
+ * Normally attacments are refcounted, but this is not the case
+ * for liveupdate-restored HWPTs.
+ * Additionally, LUO holds a reference to struct files until
+ * finish, which makes sure HWPTs are no-longer attached, so
+ * this code path is not a concern in iommufd_fops_release
+ */
+ hwpt_paging = container_of(obj, struct iommufd_hwpt_paging,
+ common.obj);
+ domain = hwpt_paging->common.domain;
+ if (domain && iommu_domain_has_attachments(domain)) {
+ ret = -EBUSY;
+ goto err_xa;
+ }
+ }
+
if (!refcount_dec_if_one(&obj->users)) {
ret = -EBUSY;
goto err_xa;
diff --git a/include/linux/kho/abi/iommufd.h b/include/linux/kho/abi/iommufd.h
index 19d6b61ec3c3..f7393ac78aa9 100644
--- a/include/linux/kho/abi/iommufd.h
+++ b/include/linux/kho/abi/iommufd.h
@@ -25,7 +25,15 @@
#define IOMMUFD_LUO_COMPATIBLE "iommufd-v1"
+struct iommufd_hwpt_lu {
+ u32 token;
+ u64 domain_data;
+ bool reclaimed;
+} __packed;
+
struct iommufd_lu {
+ unsigned int nr_hwpts;
+ struct iommufd_hwpt_lu hwpts[];
};
#endif /* _LINUX_KHO_ABI_IOMMUFD_H */
--
2.52.0.158.g65b55ccf14-goog
Powered by blists - more mailing lists