lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87o6me9nd0.ffs@tglx>
Date: Wed, 28 Jan 2026 09:02:03 +0100
From: Thomas Gleixner <tglx@...nel.org>
To: "Chang S. Bae" <chang.seok.bae@...el.com>, linux-kernel@...r.kernel.org
Cc: x86@...nel.org, mingo@...hat.com, bp@...en8.de,
 dave.hansen@...ux.intel.com, peterz@...radead.org, david.kaplan@....com,
 chang.seok.bae@...el.com
Subject: Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()

On Sun, Jan 25 2026 at 01:42, Chang S. Bae wrote:
> +/**
> + * stop_machine_nmi: freeze the machine and run this function in NMI context
> + * @fn: the function to run
> + * @data: the data ptr for the @fn()
> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)

Please format these tabular, use uppercase letters to start the
explanation, use CPU[s] all over the place and write out words instead
of using made up abbreviations. This is documentation not twitter.

 * @fn:		The function to invoke
 * @data:	The data pointer for @fn()
 * @cpus:	A cpumask containing the CPUs to run fn() on

Also this NULL == any online CPU is just made up. What's wrong with
cpu_online_mask?

> +
> +bool noinstr stop_machine_nmi_handler(void);
> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static __always_inline bool stop_machine_nmi_handler_enabled(void)

Can you please separate the declarations from the inline with an empty
new line? This glued together way to write it is unreadable.

> +{
> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
> +}
> +
>  #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
>  
>  static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> @@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>  	return stop_machine(fn, data, cpus);
>  }
>  
> +/* stop_machine_nmi() is only supported in SMP systems. */
> +static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
> +					const struct cpumask *cpus)

Align the second line argument with the first argument above.

See https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

> +{
> +	return -EINVAL;
> +}
> +

> +
> +void arch_send_self_nmi(void);
>  #endif	/* _LINUX_STOP_MACHINE */
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 3fe6b0c99f3d..189b4b108d13 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -174,6 +174,8 @@ struct multi_stop_data {
>  
>  	enum multi_stop_state	state;
>  	atomic_t		thread_ack;
> +
> +	bool			use_nmi;
>  };
>  
>  static void set_state(struct multi_stop_data *msdata,
> @@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
>  	cpu_relax();
>  }
>  
> +struct stop_machine_nmi_ctrl {
> +	bool nmi_enabled;
> +	struct multi_stop_data *msdata;
> +	int err;

Please align the struct member names tabular. See documentation.

> +};
> +
> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
> +
> +static void enable_nmi_handler(struct multi_stop_data *msdata)
> +{
> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
> +}
> +
> +void __weak arch_send_self_nmi(void)
> +{
> +	/* Arch code must implement this to support stop_machine_nmi() */

Architecture

> +}

Also this weak function is wrong.

All of this NMI mode needs to be guarded with a config option as it
otherwise is compiled in unconditionally and any accidental usage on an
architecture which does not support this will result in a undecodable
malfunction. There is a world outside of x86.

With that arch_send_self_nmi() becomes a plain declaration in a header.

> +
> +bool noinstr stop_machine_nmi_handler(void)
> +{
> +	struct multi_stop_data *msdata;
> +	int err;
> +
> +	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
> +		return false;
> +
> +	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
> +
> +	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
> +	err = msdata->fn(msdata->data);
> +	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
> +	return true;
> +}
> +
>  /* This is the cpu_stop function which stops the CPU. */
>  static int multi_cpu_stop(void *data)
>  {
> @@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
>  				hard_irq_disable();
>  				break;
>  			case MULTI_STOP_RUN:
> -				if (is_active)
> -					err = msdata->fn(msdata->data);
> +				if (is_active) {
> +					if (msdata->use_nmi) {
> +						enable_nmi_handler(msdata);
> +						arch_send_self_nmi();
> +						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
> +					} else {
> +						err = msdata->fn(msdata->data);
> +					}

And this wants to become

    if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
    	err = stop_this_cpu_nmi(msdata);
    else
	err = msdata->fn(msdata->data);

> +				}
>  				break;
>  			default:
>  				break;
> @@ -584,14 +629,15 @@ static int __init cpu_stop_init(void)
>  }
>  early_initcall(cpu_stop_init);
>  
> -int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> -			    const struct cpumask *cpus)
> +static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> +			    const struct cpumask *cpus, bool use_nmi)

The argument alignment was correct before....

>  {
>  	struct multi_stop_data msdata = {
>  		.fn = fn,
>  		.data = data,
>  		.num_threads = num_online_cpus(),
>  		.active_cpus = cpus,
> +		.use_nmi = use_nmi,
>  	};
>  
>  	lockdep_assert_cpus_held();
> @@ -620,6 +666,24 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>  	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
>  }

> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
> +{
> +	int ret;
> +
> +	cpus_read_lock();
> +	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
> +	cpus_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(stop_machine_nmi);

Why needs this to be exported? No module has any business with stomp
machine.

Thanks

        tglx

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ