lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <672db961-6056-426d-bf62-5688344be304@suse.cz>
Date: Wed, 26 Jun 2024 15:27:59 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Frederic Weisbecker <frederic@...nel.org>,
 LKML <linux-kernel@...r.kernel.org>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
 Valentin Schneider <vschneid@...hat.com>,
 Marcelo Tosatti <mtosatti@...hat.com>,
 Andrew Morton <akpm@...ux-foundation.org>, Michal Hocko <mhocko@...nel.org>,
 Thomas Gleixner <tglx@...utronix.de>, Oleg Nesterov <oleg@...hat.com>
Subject: Re: [RFC PATCH 5/6] sched/isolation: Introduce isolated task work

On 6/25/24 3:52 PM, Frederic Weisbecker wrote:
> Some asynchronous kernel work may be pending upon resume to userspace
> and execute later on. On isolated workload this becomes problematic once
> the process is done with preparatory work involving syscalls and wants
> to run in userspace without being interrupted.
> 
> Provide an infrastructure to queue a work to be executed from the current
> isolated task context right before resuming to userspace. This goes with
> the assumption that isolated tasks are pinned to a single nohz_full CPU.
> 
> Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
> ---
>  include/linux/sched.h           |  1 +
>  include/linux/sched/isolation.h | 17 +++++++++++++++++
>  kernel/sched/core.c             |  1 +
>  kernel/sched/isolation.c        | 31 +++++++++++++++++++++++++++++++
>  kernel/sched/sched.h            |  1 +
>  5 files changed, 51 insertions(+)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d531b610c410..f6df21866055 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1349,6 +1349,7 @@ struct task_struct {
>  #endif
>  
>  #ifdef CONFIG_NO_HZ_FULL
> +	struct callback_head		nohz_full_work;
>  	atomic_t			tick_dep_mask;
>  #endif
>  
> diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
> index 2b461129d1fa..e69ec5ed1d70 100644
> --- a/include/linux/sched/isolation.h
> +++ b/include/linux/sched/isolation.h
> @@ -72,4 +72,21 @@ static inline bool cpu_is_isolated(int cpu)
>  	       cpuset_cpu_is_isolated(cpu);
>  }
>  
> +#if defined(CONFIG_NO_HZ_FULL)
> +extern int __isolated_task_work_queue(void);
> +
> +static inline int isolated_task_work_queue(void)
> +{
> +	if (!housekeeping_test_cpu(raw_smp_processor_id(), HK_TYPE_TICK))

This is an unconditional call to a function defined in
kernel/sched/isolation.c, and only there a static_branch_unlikely() test
happens, but the call overhead is always paid, and the next patch adds that
to folio_add_lru().

I notice a housekeeping_cpu() function above that does the static branch
inline, which is great, except it defaults to return true so not directly
applicable, but this function could be done the same way to keep the static
branch inline.

> +		return -ENOTSUPP;
> +
> +	return __isolated_task_work_queue();
> +}
> +
> +extern void isolated_task_work_init(struct task_struct *tsk);
> +#else
> +static inline int isolated_task_work_queue(void) { }
> +static inline void isolated_task_work_init(struct task_struct *tsk) { }
> +#endif /* CONFIG_NO_HZ_FULL */
> +
>  #endif /* _LINUX_SCHED_ISOLATION_H */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f01979b600e8..01960434dbfd 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4566,6 +4566,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
>  	p->migration_pending = NULL;
>  #endif
>  	init_sched_mm_cid(p);
> +	isolated_task_work_init(p);
>  }
>  
>  DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 5891e715f00d..410df1fedc9d 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -253,3 +253,34 @@ static int __init housekeeping_isolcpus_setup(char *str)
>  	return housekeeping_setup(str, flags);
>  }
>  __setup("isolcpus=", housekeeping_isolcpus_setup);
> +
> +#if defined(CONFIG_NO_HZ_FULL)
> +static void isolated_task_work(struct callback_head *head)
> +{
> +}
> +
> +int __isolated_task_work_queue(void)
> +{
> +	unsigned long flags;
> +	int ret;
> +
> +	if (current->flags & PF_KTHREAD)
> +		return 0;
> +
> +	local_irq_save(flags);
> +	if (task_work_queued(&current->nohz_full_work)) {
> +		ret = 0;
> +		goto out;
> +	}
> +
> +	ret = task_work_add(current, &current->nohz_full_work, TWA_RESUME);
> +out:
> +	local_irq_restore(flags);
> +	return ret;
> +}
> +
> +void isolated_task_work_init(struct task_struct *tsk)
> +{
> +	init_task_work(&tsk->nohz_full_work, isolated_task_work);
> +}
> +#endif /* CONFIG_NO_HZ_FULL */
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index a831af102070..24653f5879cc 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -60,6 +60,7 @@
>  #include <linux/stop_machine.h>
>  #include <linux/syscalls_api.h>
>  #include <linux/syscalls.h>
> +#include <linux/task_work.h>
>  #include <linux/tick.h>
>  #include <linux/topology.h>
>  #include <linux/types.h>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ