lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20241210140345.GS35539@noisy.programming.kicks-ass.net>
Date: Tue, 10 Dec 2024 15:03:45 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Frederic Weisbecker <frederic@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
	"Rafael J . Wysocki" <rafael@...nel.org>,
	Daniel Lezcano <daniel.lezcano@...aro.org>,
	linux-pm@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
	Dave Hansen <dave.hansen@...ux.intel.com>
Subject: Re: [PATCH 3/5] cpuidle: Handle TIF_NR_POLLING on behalf of
 CPUIDLE_FLAG_MWAIT states

On Fri, Dec 06, 2024 at 02:04:06PM +0100, Frederic Weisbecker wrote:
> From: Peter Zijlstra <peterz@...radead.org>
> 
> The current handling of TIF_NR_POLLING is a bit of a maze:
> 
> 1) TIF_NR_POLLING is set on idle entry (one atomic set)
> 
> 2) Once cpuidle has selected an appropriate state and the tick is
>    evaluated and then possibly stopped, TIF_NR_POLLING is cleared
>    (one RmW operation)
> 
> 2) The cpuidle state is then called with TIF_NR_POLLING cleared but if
>    the state polls on (or monitors) need_resched() it sets again
>    TIF_NR_POLLING before sleeping and clears it on wake-up. Summary:
>    another pair of set/clear
> 
> 3) Set back TIF_NR_POLLING (one atomic set)
> 
> 4) goto 2) if need_resched() is not set
> 
> All those costly atomic operations, fully ordered RmW for some of
> them, could be avoided if the cpuidle core knew in advance if the target
> state polls on (or monitors) need_resched(). If so, TIF_NR_POLLING could
> simply be set once upon entering the idle loop and cleared once after
> idle loop exit.
> 
> Start dealing with that with handling TIF_NR_POLLING on behalf of
> mwait based states.
> 
> [fweisbec: _ Handle broadcast properly
>            _ Ignore mwait_idle() as it can be used by default_idle_call()]
> 
> Not-yet-signed-off-by: Peter Zijlstra <peterz@...radead.org>

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>

> Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
> ---
>  arch/x86/include/asm/mwait.h |  3 +--
>  drivers/cpuidle/cpuidle.c    | 22 +++++++++++++++++++-
>  include/linux/sched/idle.h   |  7 ++++++-
>  kernel/sched/idle.c          | 40 +++++++++++++-----------------------
>  4 files changed, 42 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
> index 920426d691ce..3634d00e5c37 100644
> --- a/arch/x86/include/asm/mwait.h
> +++ b/arch/x86/include/asm/mwait.h
> @@ -116,7 +116,7 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
>   */
>  static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
>  {
> -	if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
> +	if (static_cpu_has_bug(X86_BUG_MONITOR) || !need_resched()) {
>  		if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
>  			mb();
>  			clflush((void *)&current_thread_info()->flags);
> @@ -134,7 +134,6 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo
>  			}
>  		}
>  	}
> -	current_clr_polling();
>  }
>  
>  /*
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 0835da449db8..46c0a2726f67 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -217,10 +217,10 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
>  				 int index)
>  {
>  	int entered_state;
> -
>  	struct cpuidle_state *target_state = &drv->states[index];
>  	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
>  	ktime_t time_start, time_end;
> +	bool polling;
>  
>  	instrumentation_begin();
>  
> @@ -237,6 +237,23 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
>  		broadcast = false;
>  	}
>  
> +	polling = target_state->flags & CPUIDLE_FLAG_MWAIT;
> +
> +	/*
> +	 * If the target state doesn't poll on need_resched(), this is
> +	 * the last check after which further TIF_NEED_RESCHED remote setting
> +	 * will involve an IPI.
> +	 */
> +	if (!polling && current_clr_polling_and_test()) {
> +		if (broadcast)
> +			tick_broadcast_exit();
> +		dev->last_residency_ns = 0;
> +		local_irq_enable();
> +		instrumentation_end();
> +		return -EBUSY;
> +	}
> +
> +
>  	if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
>  		leave_mm();
>  
> @@ -336,6 +353,9 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
>  		dev->states_usage[index].rejected++;
>  	}
>  
> +	if (!polling)
> +		__current_set_polling();
> +
>  	instrumentation_end();
>  
>  	return entered_state;
> diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
> index e670ac282333..3e3482bfb028 100644
> --- a/include/linux/sched/idle.h
> +++ b/include/linux/sched/idle.h
> @@ -68,6 +68,8 @@ static __always_inline bool __must_check current_set_polling_and_test(void)
>  
>  static __always_inline bool __must_check current_clr_polling_and_test(void)
>  {
> +	bool ret;
> +
>  	__current_clr_polling();
>  
>  	/*
> @@ -76,7 +78,10 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
>  	 */
>  	smp_mb__after_atomic();
>  
> -	return unlikely(tif_need_resched());
> +	ret = unlikely(tif_need_resched());
> +	if (ret)
> +		__current_set_polling();
> +	return ret;
>  }
>  
>  #else
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 621696269584..9eece3df1080 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -114,12 +114,13 @@ void __cpuidle default_idle_call(void)
>  		stop_critical_timings();
>  
>  		ct_cpuidle_enter();
> -		arch_cpu_idle();
> +		arch_cpu_idle(); // XXX assumes !polling
>  		ct_cpuidle_exit();
>  
>  		start_critical_timings();
>  		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
>  		cond_tick_broadcast_exit();
> +		__current_set_polling();
>  	}
>  	local_irq_enable();
>  	instrumentation_end();
> @@ -128,31 +129,14 @@ void __cpuidle default_idle_call(void)
>  static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
>  			       struct cpuidle_device *dev)
>  {
> +	int ret;
> +
>  	if (current_clr_polling_and_test())
>  		return -EBUSY;
>  
> -	return cpuidle_enter_s2idle(drv, dev);
> -}
> -
> -static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> -		      int next_state)
> -{
> -	/*
> -	 * The idle task must be scheduled, it is pointless to go to idle, just
> -	 * update no idle residency and return.
> -	 */
> -	if (current_clr_polling_and_test()) {
> -		dev->last_residency_ns = 0;
> -		local_irq_enable();
> -		return -EBUSY;
> -	}
> -
> -	/*
> -	 * Enter the idle state previously returned by the governor decision.
> -	 * This function will block until an interrupt occurs and will take
> -	 * care of re-enabling the local interrupts
> -	 */
> -	return cpuidle_enter(drv, dev, next_state);
> +	ret = cpuidle_enter_s2idle(drv, dev);
> +	__current_set_polling();
> +	return ret;
>  }
>  
>  /**
> @@ -213,7 +197,7 @@ static void cpuidle_idle_call(void)
>  		tick_nohz_idle_stop_tick();
>  
>  		next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> -		call_cpuidle(drv, dev, next_state);
> +		cpuidle_enter(drv, dev, next_state);
>  	} else {
>  		bool stop_tick = true;
>  
> @@ -227,7 +211,12 @@ static void cpuidle_idle_call(void)
>  		else
>  			tick_nohz_idle_retain_tick();
>  
> -		entered_state = call_cpuidle(drv, dev, next_state);
> +		/*
> +		 * Enter the idle state previously returned by the governor decision.
> +		 * This function will block until an interrupt occurs and will take
> +		 * care of re-enabling the local interrupts.
> +		 */
> +		entered_state = cpuidle_enter(drv, dev, next_state);
>  		/*
>  		 * Give the governor an opportunity to reflect on the outcome
>  		 */
> @@ -235,7 +224,6 @@ static void cpuidle_idle_call(void)
>  	}
>  
>  exit_idle:
> -	__current_set_polling();
>  
>  	/*
>  	 * It is up to the idle functions to re-enable local interrupts
> -- 
> 2.46.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ