lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251126193608.2678510-3-dmatlack@google.com>
Date: Wed, 26 Nov 2025 19:35:49 +0000
From: David Matlack <dmatlack@...gle.com>
To: Alex Williamson <alex@...zbot.org>
Cc: Adithya Jayachandran <ajayachandra@...dia.com>, Alex Mastro <amastro@...com>, 
	Alistair Popple <apopple@...dia.com>, Andrew Morton <akpm@...ux-foundation.org>, 
	Bjorn Helgaas <bhelgaas@...gle.com>, Chris Li <chrisl@...nel.org>, 
	David Matlack <dmatlack@...gle.com>, David Rientjes <rientjes@...gle.com>, 
	Jacob Pan <jacob.pan@...ux.microsoft.com>, Jason Gunthorpe <jgg@...dia.com>, 
	Jason Gunthorpe <jgg@...pe.ca>, Josh Hilke <jrhilke@...gle.com>, Kevin Tian <kevin.tian@...el.com>, 
	kvm@...r.kernel.org, Leon Romanovsky <leonro@...dia.com>, linux-kernel@...r.kernel.org, 
	linux-kselftest@...r.kernel.org, linux-pci@...r.kernel.org, 
	Lukas Wunner <lukas@...ner.de>, Mike Rapoport <rppt@...nel.org>, Parav Pandit <parav@...dia.com>, 
	Pasha Tatashin <pasha.tatashin@...een.com>, Philipp Stanner <pstanner@...hat.com>, 
	Pratyush Yadav <pratyush@...nel.org>, Saeed Mahameed <saeedm@...dia.com>, 
	Samiullah Khawaja <skhawaja@...gle.com>, Shuah Khan <shuah@...nel.org>, 
	Tomita Moeko <tomitamoeko@...il.com>, Vipin Sharma <vipinsh@...gle.com>, William Tu <witu@...dia.com>, 
	Yi Liu <yi.l.liu@...el.com>, Yunxiang Li <Yunxiang.Li@....com>, 
	Zhu Yanjun <yanjun.zhu@...ux.dev>
Subject: [PATCH 02/21] PCI: Add API to track PCI devices preserved across Live Update

Add an API to enable the PCI subsystem to track all devices that are
preserved across a Live Update, including both incoming devices (passed
from the previous kernel) and outgoing devices (passed to the next
kernel).

Use PCI segment number and BDF to keep track of devices across Live
Update. This means the kernel must keep both identifiers constant across
a Live Update for any preserved device. VFs are not supported for now,
since that requires preserving SR-IOV state on the device to ensure the
same number of VFs appear after kexec and with the same BDFs.

Drivers that preserve devices across Live Update can now register their
struct liveupdate_file_handler with the PCI subsystem so that the PCI
subsystem can allocate and manage File-Lifecycle-Bound (FLB) global data
to track the list of incoming and outgoing preserved devices.

  pci_liveupdate_register_fh(driver_fh)
  pci_liveupdate_unregister_fh(driver_fh)

Drivers can notify the PCI subsystem whenever a device is preserved and
unpreserved with the following APIs:

  pci_liveupdate_outgoing_preserve(pci_dev)
  pci_liveupdate_outgoing_unpreserve(pci_dev)

After a Live Update, the PCI subsystem can fetch its FLB global data
from the previous kernel from the Live Update Orchestrator (LUO) to
determine which devices are preserved. This API is also made available
for drivers to use to check if a device was preserved before userspace
retrieves the file for it.

  pci_liveupdate_incoming_is_preserved(pci_dev)

Once a driver has finished restoring an incoming preserved device, it
can notify the PCI subsystem with the following call:

  pci_liveupdate_incoming_finish(pci_dev)

This will be used in subsequent commits by the vfio-pci driver to
preserve VFIO devices across Live Update.

Signed-off-by: David Matlack <dmatlack@...gle.com>
---
 drivers/pci/Makefile        |   1 +
 drivers/pci/liveupdate.c    | 248 ++++++++++++++++++++++++++++++++++++
 include/linux/kho/abi/pci.h |  53 ++++++++
 include/linux/pci.h         |  38 ++++++
 4 files changed, 340 insertions(+)
 create mode 100644 drivers/pci/liveupdate.c
 create mode 100644 include/linux/kho/abi/pci.h

diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 67647f1880fb..0cb43e10e71d 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_PROC_FS)		+= proc.o
 obj-$(CONFIG_SYSFS)		+= pci-sysfs.o slot.o
 obj-$(CONFIG_ACPI)		+= pci-acpi.o
 obj-$(CONFIG_GENERIC_PCI_IOMAP) += iomap.o
+obj-$(CONFIG_LIVEUPDATE)	+= liveupdate.o
 endif
 
 obj-$(CONFIG_OF)		+= of.o
diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
new file mode 100644
index 000000000000..f9bb97f3bada
--- /dev/null
+++ b/drivers/pci/liveupdate.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * David Matlack <dmatlack@...gle.com>
+ */
+
+#include <linux/bsearch.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/pci.h>
+#include <linux/liveupdate.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/sort.h>
+
+static DEFINE_MUTEX(pci_flb_outgoing_lock);
+static DEFINE_MUTEX(pci_flb_incoming_lock);
+
+static int pci_flb_preserve(struct liveupdate_flb_op_args *args)
+{
+	struct pci_dev *dev = NULL;
+	struct folio *folio;
+	unsigned int order;
+	int nr_devices = 0;
+	int ret;
+
+	/*
+	 * Calculate the maximum number of devices based on what's present
+	 * on the system currently (including VFs) to size the folio holding
+	 * struct pci_ser. This is not perfect given devices could be
+	 * hotplugged, but it's also unlikely that all devices in the system are
+	 * going to be preserved anyway.
+	 */
+	for_each_pci_dev(dev) {
+		if (dev->is_virtfn)
+			continue;
+
+		nr_devices += 1 + pci_sriov_get_totalvfs(dev);
+	}
+
+	order = get_order(offsetof(struct pci_ser, devices[nr_devices + 1]));
+
+	folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
+	if (!folio)
+		return -ENOMEM;
+
+	ret = kho_preserve_folio(folio);
+	if (ret) {
+		folio_put(folio);
+		return ret;
+	}
+
+	args->obj = folio_address(folio);
+	args->data = virt_to_phys(args->obj);
+
+	return 0;
+}
+
+static void pci_flb_unpreserve(struct liveupdate_flb_op_args *args)
+{
+	struct pci_ser *ser = args->obj;
+	struct folio *folio = virt_to_folio(ser);
+
+	WARN_ON_ONCE(ser->nr_devices);
+	kho_unpreserve_folio(folio);
+	folio_put(folio);
+}
+
+static int pci_flb_retrieve(struct liveupdate_flb_op_args *args)
+{
+	struct folio *folio;
+
+	folio = kho_restore_folio(args->data);
+	if (!folio)
+		panic("Unable to restore preserved FLB data from KHO (0x%llx)\n", args->data);
+
+	args->obj = folio_address(folio);
+	return 0;
+}
+
+static void pci_flb_finish(struct liveupdate_flb_op_args *args)
+{
+	struct pci_ser *ser = args->obj;
+
+	/*
+	 * Sanity check that all devices have been finished via
+	 * pci_liveupdate_incoming_finish().
+	 */
+	WARN_ON_ONCE(ser->nr_devices);
+	folio_put(virt_to_folio(ser));
+}
+
+static struct liveupdate_flb_ops pci_liveupdate_flb_ops = {
+	.preserve = pci_flb_preserve,
+	.unpreserve = pci_flb_unpreserve,
+	.retrieve = pci_flb_retrieve,
+	.finish = pci_flb_finish,
+	.owner = THIS_MODULE,
+};
+
+static struct liveupdate_flb pci_liveupdate_flb = {
+	.ops = &pci_liveupdate_flb_ops,
+	.compatible = PCI_LUO_FLB_COMPATIBLE,
+};
+
+#define INIT_PCI_DEV_SER(_dev) {		\
+	.domain = pci_domain_nr((_dev)->bus),	\
+	.bdf = pci_dev_id(_dev),		\
+}
+
+static int pci_dev_ser_cmp(const void *__a, const void *__b)
+{
+	const struct pci_dev_ser *a = __a, *b = __b;
+
+	return cmp_int(a->domain << 16 | a->bdf, b->domain << 16 | b->bdf);
+}
+
+static struct pci_dev_ser *pci_ser_find(struct pci_ser *ser, struct pci_dev *dev)
+{
+	const struct pci_dev_ser key = INIT_PCI_DEV_SER(dev);
+
+	return bsearch(&key, ser->devices, ser->nr_devices,
+		       sizeof(key), pci_dev_ser_cmp);
+}
+
+static int pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
+{
+	struct pci_dev_ser *dev_ser;
+	int i;
+
+	dev_ser = pci_ser_find(ser, dev);
+	if (!dev_ser)
+		return -ENOENT;
+
+	for (i = dev_ser - ser->devices; i < ser->nr_devices - 1; i++)
+		ser->devices[i] = ser->devices[i + 1];
+
+	ser->nr_devices--;
+	return 0;
+}
+
+static int max_nr_devices(struct pci_ser *ser)
+{
+	u64 size;
+
+	size = folio_size(virt_to_folio(ser));
+	size -= offsetof(struct pci_ser, devices);
+
+	return size / sizeof(struct pci_dev_ser);
+}
+
+int pci_liveupdate_outgoing_preserve(struct pci_dev *dev)
+{
+	struct pci_dev_ser new = INIT_PCI_DEV_SER(dev);
+	struct pci_ser *ser;
+	int i, ret;
+
+	/* VFs are not supported yet due to BDF instability across kexec */
+	if (dev->is_virtfn)
+		return -EINVAL;
+
+	guard(mutex)(&pci_flb_outgoing_lock);
+
+	ret = liveupdate_flb_get_outgoing(&pci_liveupdate_flb, (void **)&ser);
+	if (ret)
+		return ret;
+
+	if (ser->nr_devices == max_nr_devices(ser))
+		return -E2BIG;
+
+	for (i = ser->nr_devices; i > 0; i--) {
+		struct pci_dev_ser *prev = &ser->devices[i - 1];
+		int cmp = pci_dev_ser_cmp(&new, prev);
+
+		/* This device is already preserved. */
+		if (cmp == 0)
+			return 0;
+
+		if (cmp > 0)
+			break;
+
+		ser->devices[i] = *prev;
+	}
+
+	ser->devices[i] = new;
+	ser->nr_devices++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_outgoing_preserve);
+
+void pci_liveupdate_outgoing_unpreserve(struct pci_dev *dev)
+{
+	struct pci_ser *ser;
+	int ret;
+
+	guard(mutex)(&pci_flb_outgoing_lock);
+
+	ret = liveupdate_flb_get_outgoing(&pci_liveupdate_flb, (void **)&ser);
+	if (WARN_ON_ONCE(ret))
+		return;
+
+	WARN_ON_ONCE(pci_ser_delete(ser, dev));
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_outgoing_unpreserve);
+
+bool pci_liveupdate_incoming_is_preserved(struct pci_dev *dev)
+{
+	struct pci_ser *ser;
+	int ret;
+
+	guard(mutex)(&pci_flb_incoming_lock);
+
+	ret = liveupdate_flb_get_incoming(&pci_liveupdate_flb, (void **)&ser);
+	if (ret)
+		return false;
+
+	return pci_ser_find(ser, dev);
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_incoming_is_preserved);
+
+void pci_liveupdate_incoming_finish(struct pci_dev *dev)
+{
+	struct pci_ser *ser;
+	int ret;
+
+	guard(mutex)(&pci_flb_incoming_lock);
+
+	ret = liveupdate_flb_get_incoming(&pci_liveupdate_flb, (void **)&ser);
+	if (WARN_ON_ONCE(ret))
+		return;
+
+	WARN_ON_ONCE(pci_ser_delete(ser, dev));
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_incoming_finish);
+
+int pci_liveupdate_register_fh(struct liveupdate_file_handler *fh)
+{
+	return liveupdate_register_flb(fh, &pci_liveupdate_flb);
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_register_fh);
+
+int pci_liveupdate_unregister_fh(struct liveupdate_file_handler *fh)
+{
+	return liveupdate_unregister_flb(fh, &pci_liveupdate_flb);
+}
+EXPORT_SYMBOL_GPL(pci_liveupdate_unregister_fh);
diff --git a/include/linux/kho/abi/pci.h b/include/linux/kho/abi/pci.h
new file mode 100644
index 000000000000..53744b6f191a
--- /dev/null
+++ b/include/linux/kho/abi/pci.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * David Matlack <dmatlack@...gle.com>
+ */
+
+#ifndef _LINUX_KHO_ABI_PCI_H
+#define _LINUX_KHO_ABI_PCI_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/**
+ * DOC: PCI File-Lifecycle Bound (FLB) Live Update ABI
+ *
+ * This header defines the ABI for preserving core PCI state across kexec using
+ * Live Update File-Lifecycle Bound (FLB) data.
+ *
+ * This interface is a contract. Any modification to any of the serialization
+ * structs defined here constitutes a breaking change. Such changes require
+ * incrementing the version number in the PCI_LUO_FLB_COMPATIBLE string.
+ */
+
+#define PCI_LUO_FLB_COMPATIBLE "pci-v1"
+
+/**
+ * struct pci_dev_ser - Serialized state about a single PCI device.
+ *
+ * @domain: The device's PCI domain number (segment).
+ * @bdf: The device's PCI bus, device, and function number.
+ */
+struct pci_dev_ser {
+	u16 domain;
+	u16 bdf;
+} __packed;
+
+/**
+ * struct pci_ser - PCI Subsystem Live Update State
+ *
+ * This struct tracks state about all devices that are being preserved across
+ * a Live Update for the next kernel.
+ *
+ * @nr_devices: The number of devices that were preserved.
+ * @devices: Flexible array of pci_dev_ser structs for each device. Guaranteed
+ *           to be sorted ascending by domain and bdf.
+ */
+struct pci_ser {
+	u64 nr_devices;
+	struct pci_dev_ser devices[];
+} __packed;
+
+#endif /* _LINUX_KHO_ABI_PCI_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d1fdf81fbe1e..6a3c2d7e5b82 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -40,6 +40,7 @@
 #include <linux/resource_ext.h>
 #include <linux/msi_api.h>
 #include <uapi/linux/pci.h>
+#include <linux/liveupdate.h>
 
 #include <linux/pci_ids.h>
 
@@ -2795,4 +2796,41 @@ void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 	WARN_ONCE(condition, "%s %s: " fmt, \
 		  dev_driver_string(&(pdev)->dev), pci_name(pdev), ##arg)
 
+#ifdef CONFIG_LIVEUPDATE
+int pci_liveupdate_outgoing_preserve(struct pci_dev *dev);
+void pci_liveupdate_outgoing_unpreserve(struct pci_dev *dev);
+bool pci_liveupdate_incoming_is_preserved(struct pci_dev *dev);
+void pci_liveupdate_incoming_finish(struct pci_dev *dev);
+int pci_liveupdate_register_fh(struct liveupdate_file_handler *fh);
+int pci_liveupdate_unregister_fh(struct liveupdate_file_handler *fh);
+#else /* !CONFIG_LIVEUPDATE */
+static inline int pci_liveupdate_outgoing_preserve(struct pci_dev *dev)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void pci_liveupdate_outgoing_unpreserve(struct pci_dev *dev)
+{
+}
+
+static inline bool pci_liveupdate_incoming_is_preserved(struct pci_dev *dev)
+{
+	return false;
+}
+
+static inline void pci_liveupdate_incoming_finish(struct pci_dev *dev)
+{
+}
+
+static inline int pci_liveupdate_register_fh(struct liveupdate_file_handler *fh)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int pci_liveupdate_unregister_fh(struct liveupdate_file_handler *fh)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* !CONFIG_LIVEUPDATE */
+
 #endif /* LINUX_PCI_H */
-- 
2.52.0.487.g5c8c507ade-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ