lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <00ad5958-dc5f-4d18-ad24-9de607912bf8@intel.com>
Date: Thu, 29 Jan 2026 09:07:51 -0800
From: "Chang S. Bae" <chang.seok.bae@...el.com>
To: Thomas Gleixner <tglx@...nel.org>, <linux-kernel@...r.kernel.org>
CC: <x86@...nel.org>, <mingo@...hat.com>, <bp@...en8.de>,
	<dave.hansen@...ux.intel.com>, <peterz@...radead.org>, <david.kaplan@....com>
Subject: Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()

On 1/28/2026 12:02 AM, Thomas Gleixner wrote:
> On Sun, Jan 25 2026 at 01:42, Chang S. Bae wrote:
>> +/**
>> + * stop_machine_nmi: freeze the machine and run this function in NMI context
>> + * @fn: the function to run
>> + * @data: the data ptr for the @fn()
>> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
> 
> Please format these tabular, use uppercase letters to start the
> explanation, use CPU[s] all over the place and write out words instead
> of using made up abbreviations. This is documentation not twitter.
> 
>   * @fn:		The function to invoke
>   * @data:	The data pointer for @fn()
>   * @cpus:	A cpumask containing the CPUs to run fn() on
> 
> Also this NULL == any online CPU is just made up. What's wrong with
> cpu_online_mask?
> 
>> +
>> +bool noinstr stop_machine_nmi_handler(void);
>> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
>> +static __always_inline bool stop_machine_nmi_handler_enabled(void)
> 
> Can you please separate the declarations from the inline with an empty
> new line? This glued together way to write it is unreadable.

Yes, I fixed them all on my local right now.

> 
>> +{
>> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
>> +}
>> +
>>   #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
>>   
>>   static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> @@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>>   	return stop_machine(fn, data, cpus);
>>   }
>>   
>> +/* stop_machine_nmi() is only supported in SMP systems. */
>> +static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
>> +					const struct cpumask *cpus)
> 
> Align the second line argument with the first argument above.
> 
> See https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

Sorry for lot of misalignment issues in this change that I missed out.

>> +{
>> +	return -EINVAL;
>> +}
>> +
> 
>> +
>> +void arch_send_self_nmi(void);
>>   #endif	/* _LINUX_STOP_MACHINE */
>> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
>> index 3fe6b0c99f3d..189b4b108d13 100644
>> --- a/kernel/stop_machine.c
>> +++ b/kernel/stop_machine.c
>> @@ -174,6 +174,8 @@ struct multi_stop_data {
>>   
>>   	enum multi_stop_state	state;
>>   	atomic_t		thread_ack;
>> +
>> +	bool			use_nmi;
>>   };
>>   
>>   static void set_state(struct multi_stop_data *msdata,
>> @@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
>>   	cpu_relax();
>>   }
>>   
>> +struct stop_machine_nmi_ctrl {
>> +	bool nmi_enabled;
>> +	struct multi_stop_data *msdata;
>> +	int err;
> 
> Please align the struct member names tabular. See documentation.

Fixed.

> 
>> +};
>> +
>> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
>> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
>> +
>> +static void enable_nmi_handler(struct multi_stop_data *msdata)
>> +{
>> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
>> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
>> +}
>> +
>> +void __weak arch_send_self_nmi(void)
>> +{
>> +	/* Arch code must implement this to support stop_machine_nmi() */
> 
> Architecture

Fixed.

> 
>> +}
> 
> Also this weak function is wrong.
> 
> All of this NMI mode needs to be guarded with a config option as it
> otherwise is compiled in unconditionally and any accidental usage on an
> architecture which does not support this will result in a undecodable
> malfunction. There is a world outside of x86.
> 
> With that arch_send_self_nmi() becomes a plain declaration in a header.

I see.

> 
>> +
>> +bool noinstr stop_machine_nmi_handler(void)
>> +{
>> +	struct multi_stop_data *msdata;
>> +	int err;
>> +
>> +	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
>> +		return false;
>> +
>> +	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
>> +
>> +	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
>> +	err = msdata->fn(msdata->data);
>> +	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
>> +	return true;
>> +}
>> +
>>   /* This is the cpu_stop function which stops the CPU. */
>>   static int multi_cpu_stop(void *data)
>>   {
>> @@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
>>   				hard_irq_disable();
>>   				break;
>>   			case MULTI_STOP_RUN:
>> -				if (is_active)
>> -					err = msdata->fn(msdata->data);
>> +				if (is_active) {
>> +					if (msdata->use_nmi) {
>> +						enable_nmi_handler(msdata);
>> +						arch_send_self_nmi();
>> +						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
>> +					} else {
>> +						err = msdata->fn(msdata->data);
>> +					}
> 
> And this wants to become
> 
>      if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
>      	err = stop_this_cpu_nmi(msdata);
>      else
> 	err = msdata->fn(msdata->data);

Although that config option is very clear and makes tons of sense, the 
latter reads like a (silent) fallback path for a stop_machine_nmi() 
invocation with CONFIG_STOMP_MACHINE_NMI=n.

Maybe this might be clear to reject the NMI option right away with 
something like:

stop_machine_cpuslocked_nmi(...)
{
	if (!IS_ENABLED(CONFIG_STOMP_MACHINE_NMI))
		return -EOPNOTSUPP;
	...
};

>>   
>> -int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> -			    const struct cpumask *cpus)
>> +static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> +			    const struct cpumask *cpus, bool use_nmi)
> 
> The argument alignment was correct before....

Sigh... fixed, again.

>> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
>> +{
>> +	int ret;
>> +
>> +	cpus_read_lock();
>> +	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
>> +	cpus_read_unlock();
>> +
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(stop_machine_nmi);
> 
> Why needs this to be exported? No module has any business with stomp
> machine.

Not at all. Removed.

I really appreciate your time and effort for the review!


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ