lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aGfgUq9Tfn8xlcar@tiehlicka>
Date: Fri, 4 Jul 2025 16:08:18 +0200
From: Michal Hocko <mhocko@...e.com>
To: Chen Yu <yu.c.chen@...el.com>
Cc: Ingo Molnar <mingo@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Jirka Hladky <jhladky@...hat.com>,
	Srikanth Aithal <Srikanth.Aithal@....com>,
	Suneeth D <Suneeth.D@....com>, Libo Chen <libo.chen@...cle.com>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] Revert "sched/numa: add statistics of numa balance task"

On Fri 04-07-25 21:56:20, Chen Yu wrote:
> This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382.
> 
> This commit introduces per-memcg/task NUMA balance statistics,
> but unfortunately it introduced a NULL pointer exception due
> to the following race condition: After a swap task candidate
> was chosen, its mm_struct pointer was set to NULL due to task
> exit. Later, when performing the actual task swapping, the
> p->mm caused the problem.
> 
> CPU0                                   CPU1
> :
> ...
> task_numa_migrate
>      task_numa_find_cpu
>       task_numa_compare
>         # a normal task p is chosen
>         env->best_task = p
> 
>                                           # p exit:
>                                           exit_signals(p);
>                                              p->flags |= PF_EXITING
>                                           exit_mm
>                                              p->mm = NULL;
> 
>       migrate_swap_stop
>         __migrate_swap_task((arg->src_task, arg->dst_cpu)
>          count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL
> 
> task_lock() should be held and the PF_EXITING flag needs to
> be checked to prevent this from happening. After discussion,
> the conclusion was that adding a lock is not worthwhile for
> some statistics calculations. Revert the change and rely on
> the tracepoint for this purpose.
> 
> Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task")
> Reported-by: Jirka Hladky <jhladky@...hat.com>
> Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/
> Reported-by: Srikanth Aithal <Srikanth.Aithal@....com>
> Reported-by: Suneeth D <Suneeth.D@....com>
> Signed-off-by: Chen Yu <yu.c.chen@...el.com>

Acked-by: Michal Hocko <mhocko@...e.com>
Thanks!

> ---
>  Documentation/admin-guide/cgroup-v2.rst | 6 ------
>  include/linux/sched.h                   | 4 ----
>  include/linux/vm_event_item.h           | 2 --
>  kernel/sched/core.c                     | 9 ++-------
>  kernel/sched/debug.c                    | 4 ----
>  mm/memcontrol.c                         | 2 --
>  mm/vmstat.c                             | 2 --
>  7 files changed, 2 insertions(+), 27 deletions(-)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index 0cc35a14afbe..bd98ea3175ec 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1732,12 +1732,6 @@ The following nested keys are defined.
>  	  numa_hint_faults (npn)
>  		Number of NUMA hinting faults.
>  
> -	  numa_task_migrated (npn)
> -		Number of task migration by NUMA balancing.
> -
> -	  numa_task_swapped (npn)
> -		Number of task swap by NUMA balancing.
> -
>  	  pgdemote_kswapd
>  		Number of pages demoted by kswapd.
>  
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 4f78a64beb52..aa9c5be7a632 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -548,10 +548,6 @@ struct sched_statistics {
>  	u64				nr_failed_migrations_running;
>  	u64				nr_failed_migrations_hot;
>  	u64				nr_forced_migrations;
> -#ifdef CONFIG_NUMA_BALANCING
> -	u64				numa_task_migrated;
> -	u64				numa_task_swapped;
> -#endif
>  
>  	u64				nr_wakeups;
>  	u64				nr_wakeups_sync;
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 91a3ce9a2687..9e15a088ba38 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		NUMA_HINT_FAULTS,
>  		NUMA_HINT_FAULTS_LOCAL,
>  		NUMA_PAGE_MIGRATE,
> -		NUMA_TASK_MIGRATE,
> -		NUMA_TASK_SWAP,
>  #endif
>  #ifdef CONFIG_MIGRATION
>  		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 8988d38d46a3..ca0be74e865b 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
>  #ifdef CONFIG_NUMA_BALANCING
>  static void __migrate_swap_task(struct task_struct *p, int cpu)
>  {
> -	__schedstat_inc(p->stats.numa_task_swapped);
> -	count_vm_numa_event(NUMA_TASK_SWAP);
> -	count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
> -
>  	if (task_on_rq_queued(p)) {
>  		struct rq *src_rq, *dst_rq;
>  		struct rq_flags srf, drf;
> @@ -7934,9 +7930,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
>  	if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
>  		return -EINVAL;
>  
> -	__schedstat_inc(p->stats.numa_task_migrated);
> -	count_vm_numa_event(NUMA_TASK_MIGRATE);
> -	count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
> +	/* TODO: This is not properly updating schedstats */
> +
>  	trace_sched_move_numa(p, curr_cpu, target_cpu);
>  	return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
>  }
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 9d71baf08075..557246880a7e 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
>  		P_SCHEDSTAT(nr_failed_migrations_running);
>  		P_SCHEDSTAT(nr_failed_migrations_hot);
>  		P_SCHEDSTAT(nr_forced_migrations);
> -#ifdef CONFIG_NUMA_BALANCING
> -		P_SCHEDSTAT(numa_task_migrated);
> -		P_SCHEDSTAT(numa_task_swapped);
> -#endif
>  		P_SCHEDSTAT(nr_wakeups);
>  		P_SCHEDSTAT(nr_wakeups_sync);
>  		P_SCHEDSTAT(nr_wakeups_migrate);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 902da8a9c643..70fdeda1120b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = {
>  	NUMA_PAGE_MIGRATE,
>  	NUMA_PTE_UPDATES,
>  	NUMA_HINT_FAULTS,
> -	NUMA_TASK_MIGRATE,
> -	NUMA_TASK_SWAP,
>  #endif
>  };
>  
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 429ae5339bfe..a78d70ddeacd 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = {
>  	"numa_hint_faults",
>  	"numa_hint_faults_local",
>  	"numa_pages_migrated",
> -	"numa_task_migrated",
> -	"numa_task_swapped",
>  #endif
>  #ifdef CONFIG_MIGRATION
>  	"pgmigrate_success",
> -- 
> 2.25.1
> 

-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ