lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2025093023-frantic-sediment-9849@gregkh>
Date: Tue, 30 Sep 2025 17:26:17 +0200
From: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To: Chris Li <chrisl@...nel.org>
Cc: Bjorn Helgaas <bhelgaas@...gle.com>,
	"Rafael J. Wysocki" <rafael@...nel.org>,
	Danilo Krummrich <dakr@...nel.org>, Len Brown <lenb@...nel.org>,
	Pasha Tatashin <pasha.tatashin@...een.com>,
	linux-kernel@...r.kernel.org, linux-pci@...r.kernel.org,
	linux-acpi@...r.kernel.org, David Matlack <dmatlack@...gle.com>,
	Pasha Tatashin <tatashin@...gle.com>,
	Jason Miu <jasonmiu@...gle.com>, Vipin Sharma <vipinsh@...gle.com>,
	Saeed Mahameed <saeedm@...dia.com>,
	Adithya Jayachandran <ajayachandra@...dia.com>,
	Parav Pandit <parav@...dia.com>, William Tu <witu@...dia.com>,
	Mike Rapoport <rppt@...nel.org>, Jason Gunthorpe <jgg@...pe.ca>,
	Leon Romanovsky <leon@...nel.org>
Subject: Re: [PATCH v2 02/10] PCI/LUO: Create requested liveupdate device list

On Tue, Sep 16, 2025 at 12:45:10AM -0700, Chris Li wrote:
>  #define pr_fmt(fmt) "PCI liveupdate: " fmt
> +#define dev_fmt(fmt) "PCI liveupdate: " fmt

Please no.  Use the default dev_ formatting so that people can correct
track the devices spitting out messages here.

> +#include <linux/types.h>
>  #include <linux/liveupdate.h>
> +#include "pci.h"
>  
>  #define PCI_SUBSYSTEM_NAME "pci"

I still don't know why this is needed, why?

>  
> +static void stack_push_buses(struct list_head *stack, struct list_head *buses)
> +{
> +	struct pci_bus *bus;
> +
> +	list_for_each_entry(bus, buses, node)
> +		list_move_tail(&bus->dev.lu.lu_next, stack);
> +}
> +
> +static void liveupdate_add_dev(struct device *dev, struct list_head *head)
> +{
> +	dev_info(dev, "collect liveupdate device: flags %x\n", dev->lu.flags);

Debugging code can go away please.

> +	list_move_tail(&dev->lu.lu_next, head);
> +}
> +
> +static int collect_bus_devices_reverse(struct pci_bus *bus, struct list_head *head)
> +{
> +	struct pci_dev *pdev;
> +	int count = 0;
> +
> +	list_for_each_entry_reverse(pdev, &bus->devices, bus_list) {

Why are you allowed to walk the pci bus list here?  Shouldn't there be
some type of core function to do that?

And why in reverse?

> +		if (pdev->dev.lu.flags & LU_BUSMASTER &&  pdev->dev.parent)
> +			pdev->dev.parent->lu.flags |= LU_BUSMASTER_BRIDGE;
> +		if (pdev->dev.lu.flags) {
> +			liveupdate_add_dev(&pdev->dev, head);
> +			count++;

No locking?

> +		}
> +	}
> +	return count;

What prevents this value from changing right after you return it?

> +}
> +
> +static int build_liveupdate_devices(struct list_head *head)
> +{
> +	LIST_HEAD(bus_stack);
> +	int count = 0;
> +
> +	stack_push_buses(&bus_stack, &pci_root_buses);
> +
> +	while (!list_empty(&bus_stack)) {
> +		struct device *busdev;
> +		struct pci_bus *bus;
> +
> +		busdev = list_last_entry(&bus_stack, struct device, lu.lu_next);
> +		bus = to_pci_bus(busdev);
> +		if (!busdev->lu.visited && !list_empty(&bus->children)) {
> +			stack_push_buses(&bus_stack, &bus->children);
> +			busdev->lu.visited = 1;
> +			continue;
> +		}
> +
> +		count += collect_bus_devices_reverse(bus, head);
> +		busdev->lu.visited = 0;
> +		list_del_init(&busdev->lu.lu_next);
> +	}
> +	return count;

A comment here about what you are trying to do with walking the list of
devices.  Somehow.  Are you sure that's right?  It feels backwards, and
the lack of any locking makes me very nervous.  How is this integrating
into the normal driver model lists?

> +}
> +
> +static void cleanup_liveupdate_devices(struct list_head *head)
> +{
> +	struct device *d, *n;
> +
> +	list_for_each_entry_safe(d, n, head, lu.lu_next) {
> +		d->lu.flags &= ~LU_DEPENDED;
> +		list_del_init(&d->lu.lu_next);
> +	}
> +}

What does "cleanup" mean?

> +
>  static int pci_liveupdate_prepare(void *arg, u64 *data)
>  {
> +	LIST_HEAD(requested_devices);
> +
>  	pr_info("prepare data[%llx]\n", *data);

Addresses written to the kernel log?

> +
> +	pci_lock_rescan_remove();
> +	down_write(&pci_bus_sem);
> +
> +	build_liveupdate_devices(&requested_devices);

Ah, you lock here.  Document the heck out of this and put the proper
build macros in there so we know what is going on.

> +	cleanup_liveupdate_devices(&requested_devices);
> +
> +	up_write(&pci_bus_sem);

Why is it a write?  You aren't modifying the list, are you?

> +	pci_unlock_rescan_remove();
>  	return 0;
>  }
>  
> diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
> index e8318fd5f6ed537a1b236a3a0f054161d5710abd..0e9ef387182856771d857181d88f376632b46f0d 100644
> --- a/drivers/pci/pcie/portdrv.c
> +++ b/drivers/pci/pcie/portdrv.c
> @@ -304,6 +304,7 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
>  	device = &pcie->device;
>  	device->bus = &pcie_port_bus_type;
>  	device->release = release_pcie_device;	/* callback to free pcie dev */
> +	dev_liveupdate_init(device);

Why here?

>  	dev_set_name(device, "%s:pcie%03x",
>  		     pci_name(pdev),
>  		     get_descriptor_id(pci_pcie_type(pdev), service));
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 4b8693ec9e4c67fc1655e0057b3b96b4098e6630..dddd7ebc03d1a6e6ee456e0bf02ab9833a819509 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -614,6 +614,7 @@ static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
>  	INIT_LIST_HEAD(&b->devices);
>  	INIT_LIST_HEAD(&b->slots);
>  	INIT_LIST_HEAD(&b->resources);
> +	dev_liveupdate_init(&b->dev);

Same, why here?  Shouldn't the driver core be doing this all for you
automatically?  Are you going to make each bus do this manually?

>  	b->max_bus_speed = PCI_SPEED_UNKNOWN;
>  	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
>  #ifdef CONFIG_PCI_DOMAINS_GENERIC
> @@ -1985,6 +1986,7 @@ int pci_setup_device(struct pci_dev *dev)
>  	dev->sysdata = dev->bus->sysdata;
>  	dev->dev.parent = dev->bus->bridge;
>  	dev->dev.bus = &pci_bus_type;
> +	dev_liveupdate_init(&dev->dev);

Looks like you are :(

Do it in one place please.

>  	dev->hdr_type = hdr_type & 0x7f;
>  	dev->multifunction = !!(hdr_type & 0x80);
>  	dev->error_state = pci_channel_io_normal;
> @@ -3184,7 +3186,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>  		return NULL;
>  
>  	bridge->dev.parent = parent;
> -
> +	dev_liveupdate_init(&bridge->dev);

Again, one place.

>  	list_splice_init(resources, &bridge->windows);
>  	bridge->sysdata = sysdata;
>  	bridge->busnr = bus;
> diff --git a/include/linux/dev_liveupdate.h b/include/linux/dev_liveupdate.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..72297cba08a999e89f7bc0997dabdbe14e0aa12c
> --- /dev/null
> +++ b/include/linux/dev_liveupdate.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * Copyright (c) 2025, Google LLC.
> + * Pasha Tatashin <pasha.tatashin@...een.com>
> + * Chris Li <chrisl@...nel.org>
> + */
> +#ifndef _LINUX_DEV_LIVEUPDATE_H
> +#define _LINUX_DEV_LIVEUPDATE_H
> +
> +#include <linux/liveupdate.h>
> +
> +#ifdef CONFIG_LIVEUPDATE
> +
> +enum liveupdate_flag {
> +	LU_BUSMASTER = 1 << 0,
> +	LU_BUSMASTER_BRIDGE = 2 << 0,

BIT() please.

> +};
> +
> +#define	LU_REQUESTED	(LU_BUSMASTER)
> +#define	LU_DEPENDED	(LU_BUSMASTER_BRIDGE)

Why 2 names for the same thing?

> +
> +/**
> + * struct dev_liveupdate - Device state for live update operations
> + * @lu_next:	List head for linking the device into live update
> + *		related lists (e.g., a list of devices participating
> + *		in a live update sequence).
> + * @flags:	Indicate what liveupdate feature does the device
> + *		participtate.
> + * @visited:	Only used by the bus devices when travese the PCI buses
> + *		to build the liveupdate devices list. Set if the child
> + *		buses have been pushed into the pending stack.
> + *
> + * This structure holds the state information required for performing
> + * live update operations on a device. It is embedded within a struct device.
> + */
> +struct dev_liveupdate {
> +	struct list_head lu_next;

Another list?

> +	enum liveupdate_flag flags;
> +	bool visited:1;

You shouldn't need this, you "know" you only touch one device at a time
when walking a bus, don't try to manually keep track of it on your own.

And again, why is the pci core doing this, the driver core should be
doing all of this, PLEASE do not bury driver-model-core-changes down in
a "PCI" patch.  That will make the driver core maintainers very grumpy
when they run across stuff like this (as it did here...)

> +};
> +
> +#endif /* CONFIG_LIVEUPDATE */
> +#endif /* _LINUX_DEV_LIVEUPDATE_H */
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 4940db137fffff4ceacf819b32433a0f4898b125..e0b35c723239f1254a3b6152f433e0412cd3fb34 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -21,6 +21,7 @@
>  #include <linux/lockdep.h>
>  #include <linux/compiler.h>
>  #include <linux/types.h>
> +#include <linux/dev_liveupdate.h>

Look, driver core changes.  Please do this all in stuff that is NOT for
just PCI.


>  #include <linux/mutex.h>
>  #include <linux/pm.h>
>  #include <linux/atomic.h>
> @@ -508,6 +509,7 @@ struct device_physical_location {
>   * @pm_domain:	Provide callbacks that are executed during system suspend,
>   * 		hibernation, system resume and during runtime PM transitions
>   * 		along with subsystem-level and driver-level callbacks.
> + * @lu:		Live update state.

You have more letters, please use them.  "lu" is too short.

>   * @em_pd:	device's energy model performance domain
>   * @pins:	For device pin management.
>   *		See Documentation/driver-api/pin-control.rst for details.
> @@ -603,6 +605,10 @@ struct device {
>  	struct dev_pm_info	power;
>  	struct dev_pm_domain	*pm_domain;
>  
> +#ifdef CONFIG_LIVEUPDATE
> +	struct dev_liveupdate	lu;
> +#endif

Why not a pointer?

> +
>  #ifdef CONFIG_ENERGY_MODEL
>  	struct em_perf_domain	*em_pd;
>  #endif
> @@ -1168,4 +1174,13 @@ void device_link_wait_removal(void);
>  #define MODULE_ALIAS_CHARDEV_MAJOR(major) \
>  	MODULE_ALIAS("char-major-" __stringify(major) "-*")
>  
> +#ifdef CONFIG_LIVEUPDATE
> +static inline void dev_liveupdate_init(struct device *dev)
> +{
> +	INIT_LIST_HEAD(&dev->lu.lu_next);

Why does this have to be in device.h?  The driver core should do this
for you (as I say above).

thanks,

greg k-h

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ