[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <81f201cc-395a-48d7-a1b0-db8c62b93c9e@redhat.com>
Date: Sun, 28 Dec 2025 22:53:41 -0500
From: Waiman Long <llong@...hat.com>
To: Zhang Qiao <zhangqiao22@...wei.com>,
Frederic Weisbecker <frederic@...nel.org>,
LKML <linux-kernel@...r.kernel.org>
Cc: Michal Koutný <mkoutny@...e.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Catalin Marinas <catalin.marinas@....com>,
Chen Ridong <chenridong@...wei.com>, Danilo Krummrich <dakr@...nel.org>,
"David S . Miller" <davem@...emloft.net>, Eric Dumazet
<edumazet@...gle.com>, Gabriele Monaco <gmonaco@...hat.com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Ingo Molnar <mingo@...hat.com>, Jakub Kicinski <kuba@...nel.org>,
Jens Axboe <axboe@...nel.dk>, Johannes Weiner <hannes@...xchg.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Marco Crivellari <marco.crivellari@...e.com>, Michal Hocko
<mhocko@...e.com>, Muchun Song <muchun.song@...ux.dev>,
Paolo Abeni <pabeni@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Phil Auld <pauld@...hat.com>, "Rafael J . Wysocki" <rafael@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeel.butt@...ux.dev>, Simon Horman <horms@...nel.org>,
Tejun Heo <tj@...nel.org>, Thomas Gleixner <tglx@...utronix.de>,
Vlastimil Babka <vbabka@...e.cz>, Will Deacon <will@...nel.org>,
cgroups@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
linux-block@...r.kernel.org, linux-mm@...ck.org, linux-pci@...r.kernel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH 01/33] PCI: Prepare to protect against concurrent isolated
cpuset change
On 12/28/25 10:23 PM, Zhang Qiao wrote:
> Hi, Weisbecker,
>
> 在 2025/12/24 21:44, Frederic Weisbecker 写道:
>> HK_TYPE_DOMAIN will soon integrate cpuset isolated partitions and
>> therefore be made modifiable at runtime. Synchronize against the cpumask
>> update using RCU.
>>
>> The RCU locked section includes both the housekeeping CPU target
>> election for the PCI probe work and the work enqueue.
>>
>> This way the housekeeping update side will simply need to flush the
>> pending related works after updating the housekeeping mask in order to
>> make sure that no PCI work ever executes on an isolated CPU. This part
>> will be handled in a subsequent patch.
>>
>> Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
>> ---
>> drivers/pci/pci-driver.c | 47 ++++++++++++++++++++++++++++++++--------
>> 1 file changed, 38 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>> index 7c2d9d596258..786d6ce40999 100644
>> --- a/drivers/pci/pci-driver.c
>> +++ b/drivers/pci/pci-driver.c
>> @@ -302,9 +302,8 @@ struct drv_dev_and_id {
>> const struct pci_device_id *id;
>> };
>>
>> -static long local_pci_probe(void *_ddi)
>> +static int local_pci_probe(struct drv_dev_and_id *ddi)
>> {
>> - struct drv_dev_and_id *ddi = _ddi;
>> struct pci_dev *pci_dev = ddi->dev;
>> struct pci_driver *pci_drv = ddi->drv;
>> struct device *dev = &pci_dev->dev;
>> @@ -338,6 +337,19 @@ static long local_pci_probe(void *_ddi)
>> return 0;
>> }
>>
>> +struct pci_probe_arg {
>> + struct drv_dev_and_id *ddi;
>> + struct work_struct work;
>> + int ret;
>> +};
>> +
>> +static void local_pci_probe_callback(struct work_struct *work)
>> +{
>> + struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work);
>> +
>> + arg->ret = local_pci_probe(arg->ddi);
>> +}
>> +
>> static bool pci_physfn_is_probed(struct pci_dev *dev)
>> {
>> #ifdef CONFIG_PCI_IOV
>> @@ -362,34 +374,51 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
>> dev->is_probed = 1;
>>
>> cpu_hotplug_disable();
>> -
>> /*
>> * Prevent nesting work_on_cpu() for the case where a Virtual Function
>> * device is probed from work_on_cpu() of the Physical device.
>> */
>> if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
>> pci_physfn_is_probed(dev)) {
>> - cpu = nr_cpu_ids;
>> + error = local_pci_probe(&ddi);
>> } else {
>> cpumask_var_t wq_domain_mask;
>> + struct pci_probe_arg arg = { .ddi = &ddi };
>> +
>> + INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback);
>>
>> if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
>> error = -ENOMEM;
> If we return from here, arg.work will not be destroyed.
>
>
Right. INIT_WORK_ONSTACK() should be called after successful
cpumask_var_t allocation.
Cheers,
Longman
>> goto out;
>> }
>> +
>> + /*
>> + * The target election and the enqueue of the work must be within
>> + * the same RCU read side section so that when the workqueue pool
>> + * is flushed after a housekeeping cpumask update, further readers
>> + * are guaranteed to queue the probing work to the appropriate
>> + * targets.
>> + */
>> + rcu_read_lock();
>> cpumask_and(wq_domain_mask,
>> housekeeping_cpumask(HK_TYPE_WQ),
>> housekeeping_cpumask(HK_TYPE_DOMAIN));
>>
>> cpu = cpumask_any_and(cpumask_of_node(node),
>> wq_domain_mask);
>> + if (cpu < nr_cpu_ids) {
>> + schedule_work_on(cpu, &arg.work);
>> + rcu_read_unlock();
>> + flush_work(&arg.work);
>> + error = arg.ret;
>> + } else {
>> + rcu_read_unlock();
>> + error = local_pci_probe(&ddi);
>> + }
>> +
>> free_cpumask_var(wq_domain_mask);
>> + destroy_work_on_stack(&arg.work);
>> }
>> -
>> - if (cpu < nr_cpu_ids)
>> - error = work_on_cpu(cpu, local_pci_probe, &ddi);
>> - else
>> - error = local_pci_probe(&ddi);
>> out:
>> dev->is_probed = 0;
>> cpu_hotplug_enable();
>>
Powered by blists - more mailing lists