[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260122161435.GA1250260@bhelgaas>
Date: Thu, 22 Jan 2026 10:14:35 -0600
From: Bjorn Helgaas <helgaas@...nel.org>
To: Frederic Weisbecker <frederic@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Michal Koutný <mkoutny@...e.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Catalin Marinas <catalin.marinas@....com>,
Chen Ridong <chenridong@...wei.com>,
Danilo Krummrich <dakr@...nel.org>,
"David S . Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Gabriele Monaco <gmonaco@...hat.com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Ingo Molnar <mingo@...hat.com>, Jakub Kicinski <kuba@...nel.org>,
Jens Axboe <axboe@...nel.dk>, Johannes Weiner <hannes@...xchg.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Marco Crivellari <marco.crivellari@...e.com>,
Michal Hocko <mhocko@...e.com>, Muchun Song <muchun.song@...ux.dev>,
Paolo Abeni <pabeni@...hat.com>,
Peter Zijlstra <peterz@...radead.org>, Phil Auld <pauld@...hat.com>,
"Rafael J . Wysocki" <rafael@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Simon Horman <horms@...nel.org>, Tejun Heo <tj@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Vlastimil Babka <vbabka@...e.cz>, Waiman Long <longman@...hat.com>,
Will Deacon <will@...nel.org>, cgroups@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-block@...r.kernel.org,
linux-mm@...ck.org, linux-pci@...r.kernel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH 17/33] PCI: Flush PCI probe workqueue on cpuset isolated
partition change
On Thu, Jan 01, 2026 at 11:13:42PM +0100, Frederic Weisbecker wrote:
> The HK_TYPE_DOMAIN housekeeping cpumask is now modifiable at runtime. In
> order to synchronize against PCI probe works and make sure that no
> asynchronous probing is still pending or executing on a newly isolated
> CPU, the housekeeping subsystem must flush the PCI probe works.
>
> However the PCI probe works can't be flushed easily since they are
> queued to the main per-CPU workqueue pool.
>
> Solve this with creating a PCI probe-specific pool and provide and use
> the appropriate flushing API.
>
> Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
Acked-by: Bjorn Helgaas <bhelgaas@...gle.com>
> ---
> drivers/pci/pci-driver.c | 17 ++++++++++++++++-
> include/linux/pci.h | 3 +++
> kernel/sched/isolation.c | 2 ++
> 3 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index a6111140755c..b902d8adf9a5 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -337,6 +337,8 @@ static int local_pci_probe(struct drv_dev_and_id *ddi)
> return 0;
> }
>
> +static struct workqueue_struct *pci_probe_wq;
> +
> struct pci_probe_arg {
> struct drv_dev_and_id *ddi;
> struct work_struct work;
> @@ -407,7 +409,11 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
> cpu = cpumask_any_and(cpumask_of_node(node),
> wq_domain_mask);
> if (cpu < nr_cpu_ids) {
> - schedule_work_on(cpu, &arg.work);
> + struct workqueue_struct *wq = pci_probe_wq;
> +
> + if (WARN_ON_ONCE(!wq))
> + wq = system_percpu_wq;
> + queue_work_on(cpu, wq, &arg.work);
> rcu_read_unlock();
> flush_work(&arg.work);
> error = arg.ret;
> @@ -425,6 +431,11 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
> return error;
> }
>
> +void pci_probe_flush_workqueue(void)
> +{
> + flush_workqueue(pci_probe_wq);
> +}
> +
> /**
> * __pci_device_probe - check if a driver wants to claim a specific PCI device
> * @drv: driver to call to check if it wants the PCI device
> @@ -1762,6 +1773,10 @@ static int __init pci_driver_init(void)
> {
> int ret;
>
> + pci_probe_wq = alloc_workqueue("sync_wq", WQ_PERCPU, 0);
> + if (!pci_probe_wq)
> + return -ENOMEM;
> +
> ret = bus_register(&pci_bus_type);
> if (ret)
> return ret;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 864775651c6f..f14f467e50de 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1206,6 +1206,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> struct pci_ops *ops, void *sysdata,
> struct list_head *resources);
> int pci_host_probe(struct pci_host_bridge *bridge);
> +void pci_probe_flush_workqueue(void);
> int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
> int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
> void pci_bus_release_busn_res(struct pci_bus *b);
> @@ -2079,6 +2080,8 @@ static inline int pci_has_flag(int flag) { return 0; }
> _PCI_NOP_ALL(read, *)
> _PCI_NOP_ALL(write,)
>
> +static inline void pci_probe_flush_workqueue(void) { }
> +
> static inline struct pci_dev *pci_get_device(unsigned int vendor,
> unsigned int device,
> struct pci_dev *from)
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index ec3f15164fd1..5239f556745d 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -8,6 +8,7 @@
> *
> */
> #include <linux/sched/isolation.h>
> +#include <linux/pci.h>
> #include "sched.h"
>
> enum hk_flags {
> @@ -142,6 +143,7 @@ int housekeeping_update(struct cpumask *isol_mask)
>
> synchronize_rcu();
>
> + pci_probe_flush_workqueue();
> mem_cgroup_flush_workqueue();
> vmstat_flush_workqueue();
>
> --
> 2.51.1
>
Powered by blists - more mailing lists