lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231124223226.24249-7-frederic@kernel.org>
Date:   Fri, 24 Nov 2023 23:32:25 +0100
From:   Frederic Weisbecker <frederic@...nel.org>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        "Rafael J . Wysocki" <rafael@...nel.org>,
        Daniel Lezcano <daniel.lezcano@...aro.org>,
        linux-pm@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Frederic Weisbecker <frederic@...nel.org>
Subject: [PATCH 6/7] cpuidle: Handle TIF_NR_POLLING on behalf of CPUIDLE_FLAG_POLLING_HARD states

From: Peter Zijlstra <peterz@...radead.org>

The current handling of TIF_NR_POLLING is a bit of a maze:

1) A common brief part in the generic idle loop sets TIF_NR_POLLING
  while cpuidle selects an appropriate state and the tick is evaluated
  and then stopped. Summary: One pair of set/clear

2) The state cpuidle is then called with TIF_NR_POLLING cleared but if
  the state polls on need_resched() (software or hardware), it sets
  again TIF_NR_POLLING and clears it when it completes. Summary: another
  pair of set/clear

3) goto 1)

However those costly atomic operations, fully ordered RmW for some of
them, could be avoided if the cpuidle core knew in advance if the target
state polls on need_resched(). If so, TIF_NR_POLLING could simply be
set once before entering the idle loop and cleared once after idle loop
exit.

Start dealing with that with handling TIF_NR_POLLING on behalf of
CPUIDLE_FLAG_POLLING_HARD states.

[fweisbec: _ Handle broadcast properly
           _ Ignore mwait_idle() as it can be used by default_idle_call()]

Not-yet-signed-off-by: Peter Zijlstra <peterz@...radead.org>
Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
---
 arch/x86/include/asm/mwait.h |  3 +--
 drivers/cpuidle/cpuidle.c    | 22 +++++++++++++++++++-
 include/linux/sched/idle.h   |  7 ++++++-
 kernel/sched/idle.c          | 40 +++++++++++++-----------------------
 4 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 920426d691ce..3634d00e5c37 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -116,7 +116,7 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
  */
 static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-	if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
+	if (static_cpu_has_bug(X86_BUG_MONITOR) || !need_resched()) {
 		if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
 			mb();
 			clflush((void *)&current_thread_info()->flags);
@@ -134,7 +134,6 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo
 			}
 		}
 	}
-	current_clr_polling();
 }
 
 /*
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 737a026ef58a..49078cc83f4a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -213,10 +213,10 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
 				 int index)
 {
 	int entered_state;
-
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
 	ktime_t time_start, time_end;
+	bool polling;
 
 	instrumentation_begin();
 
@@ -236,6 +236,23 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
 		broadcast = false;
 	}
 
+	polling = target_state->flags & CPUIDLE_FLAG_POLLING_HARD;
+
+	/*
+	 * If the target state doesn't poll on need_resched(), this is
+	 * the last check after which further TIF_NEED_RESCHED remote setting
+	 * will involve an IPI.
+	 */
+	if (!polling && current_clr_polling_and_test()) {
+		if (broadcast)
+			tick_broadcast_exit();
+		dev->last_residency_ns = 0;
+		local_irq_enable();
+		instrumentation_end();
+		return -EBUSY;
+	}
+
+
 	if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
 		leave_mm(dev->cpu);
 
@@ -335,6 +352,9 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
 		dev->states_usage[index].rejected++;
 	}
 
+	if (!polling)
+		__current_set_polling();
+
 	instrumentation_end();
 
 	return entered_state;
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 478084f9105e..50c13531f5d8 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -68,6 +68,8 @@ static __always_inline bool __must_check current_set_polling_and_test(void)
 
 static __always_inline bool __must_check current_clr_polling_and_test(void)
 {
+	bool ret;
+
 	__current_clr_polling();
 
 	/*
@@ -76,7 +78,10 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
 	 */
 	smp_mb__after_atomic();
 
-	return unlikely(tif_need_resched());
+	ret = unlikely(tif_need_resched());
+	if (ret)
+		__current_set_polling();
+	return ret;
 }
 
 #else
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 565f8374ddbb..4e554b4e3781 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -94,11 +94,12 @@ void __cpuidle default_idle_call(void)
 		stop_critical_timings();
 
 		ct_cpuidle_enter();
-		arch_cpu_idle();
+		arch_cpu_idle(); // XXX assumes !polling
 		ct_cpuidle_exit();
 
 		start_critical_timings();
 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+		__current_set_polling();
 	}
 	local_irq_enable();
 	instrumentation_end();
@@ -107,31 +108,14 @@ void __cpuidle default_idle_call(void)
 static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
 			       struct cpuidle_device *dev)
 {
+	int ret;
+
 	if (current_clr_polling_and_test())
 		return -EBUSY;
 
-	return cpuidle_enter_s2idle(drv, dev);
-}
-
-static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
-		      int next_state)
-{
-	/*
-	 * The idle task must be scheduled, it is pointless to go to idle, just
-	 * update no idle residency and return.
-	 */
-	if (current_clr_polling_and_test()) {
-		dev->last_residency_ns = 0;
-		local_irq_enable();
-		return -EBUSY;
-	}
-
-	/*
-	 * Enter the idle state previously returned by the governor decision.
-	 * This function will block until an interrupt occurs and will take
-	 * care of re-enabling the local interrupts
-	 */
-	return cpuidle_enter(drv, dev, next_state);
+	ret = cpuidle_enter_s2idle(drv, dev);
+	__current_set_polling();
+	return ret;
 }
 
 /**
@@ -198,7 +182,7 @@ static void cpuidle_idle_call(void)
 		tick_nohz_idle_stop_tick();
 
 		next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
-		call_cpuidle(drv, dev, next_state);
+		cpuidle_enter(drv, dev, next_state);
 	} else {
 		bool stop_tick = true;
 
@@ -212,7 +196,12 @@ static void cpuidle_idle_call(void)
 		else
 			tick_nohz_idle_retain_tick();
 
-		entered_state = call_cpuidle(drv, dev, next_state);
+		/*
+		 * Enter the idle state previously returned by the governor decision.
+		 * This function will block until an interrupt occurs and will take
+		 * care of re-enabling the local interrupts.
+		 */
+		entered_state = cpuidle_enter(drv, dev, next_state);
 		/*
 		 * Give the governor an opportunity to reflect on the outcome
 		 */
@@ -220,7 +209,6 @@ static void cpuidle_idle_call(void)
 	}
 
 exit_idle:
-	__current_set_polling();
 
 	/*
 	 * It is up to the idle functions to reenable local interrupts
-- 
2.42.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ