lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251204175405.1511340-13-srikar@linux.ibm.com>
Date: Thu,  4 Dec 2025 23:24:00 +0530
From: Srikar Dronamraju <srikar@...ux.ibm.com>
To: linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
        Peter Zijlstra <peterz@...radead.org>
Cc: Ben Segall <bsegall@...gle.com>,
        Christophe Leroy <christophe.leroy@...roup.eu>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Ingo Molnar <mingo@...nel.org>, Juri Lelli <juri.lelli@...hat.com>,
        K Prateek Nayak <kprateek.nayak@....com>,
        Madhavan Srinivasan <maddy@...ux.ibm.com>,
        Mel Gorman <mgorman@...e.de>, Michael Ellerman <mpe@...erman.id.au>,
        Nicholas Piggin <npiggin@...il.com>,
        Shrikanth Hegde <sshegde@...ux.ibm.com>,
        Srikar Dronamraju <srikar@...ux.ibm.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Swapnil Sapkal <swapnil.sapkal@....com>,
        Thomas Huth <thuth@...hat.com>,
        Valentin Schneider <vschneid@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        virtualization@...ts.linux.dev, Yicong Yang <yangyicong@...ilicon.com>,
        Ilya Leoshkevich <iii@...ux.ibm.com>
Subject: [PATCH 12/17] pseries/smp: Trigger softoffline based on steal metrics

Based on the steal metrics, update the number of CPUs that need to
soft onlined/offlined. If LPAR continues to see steal above the given
higher threshold, then continue to offline more CPUs. This will result
in more CPUs of the active cores being used and LPAR should see lesser
vCPU preemption. In the next interval, the steal metrics would also
continue to drop. If LPAR continues to see steal below the lower
threshold, then continue to online more cores. To avoid ping-pong
behaviour, online/offline a core only if steal metrics trend is seen for
at least 2 intervals.

In a PowerVM environment schedules at a core granularity. Hence its
preferable to soft online/offline an entire core. Online / Offline of
only few CPUs from a core is neither going to reduce steal nor would the
resources being used efficiently/effectively.

A Shared LPAR on a PowerVM environment will have cores interleaved
across multiple NUMA nodes. Hence choosing the last active core to
offline and the first inactive core to online will most likely be able
to balance NUMA. A more intelligent approach to select cores to online
/offline may be needed in the future.

Signed-off-by: Srikar Dronamraju <srikar@...ux.ibm.com>
---
 arch/powerpc/platforms/pseries/lpar.c    |  3 --
 arch/powerpc/platforms/pseries/pseries.h |  3 ++
 arch/powerpc/platforms/pseries/smp.c     | 57 ++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f8e049ac9364..f5caf1137707 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -662,9 +662,6 @@ machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
 #define STEAL_MULTIPLE (STEAL_RATIO * STEAL_RATIO)
 #define PURR_UPDATE_TB tb_ticks_per_sec
 
-static void trigger_softoffline(unsigned long steal_ratio)
-{
-}
 
 static bool should_cpu_process_steal(int cpu)
 {
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 68cf25152870..2527c2049e74 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -119,6 +119,9 @@ int dlpar_workqueue_init(void);
 
 extern u32 pseries_security_flavor;
 void pseries_setup_security_mitigations(void);
+#ifdef CONFIG_PPC_SPLPAR
+void trigger_softoffline(unsigned long steal_ratio);
+#endif
 
 #ifdef CONFIG_PPC_64S_HASH_MMU
 void pseries_lpar_read_hblkrm_characteristics(void);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ec1af13670f2..4c83749018d0 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -51,6 +51,9 @@
  * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
  */
 static cpumask_var_t of_spin_mask;
+#ifdef CONFIG_PPC_SPLPAR
+static cpumask_var_t cpus;
+#endif
 
 /* Query where a cpu is now.  Return codes #defined in plpar_wrappers.h */
 int smp_query_cpu_stopped(unsigned int pcpu)
@@ -277,6 +280,14 @@ static __init void pSeries_smp_probe(void)
 }
 
 #ifdef CONFIG_PPC_SPLPAR
+/*
+ * Set higher threshold values to which steal has to be limited. Also set
+ * lower threshold values below which allow work to spread out to more
+ * cores.
+ */
+#define STEAL_RATIO_HIGH (10 * STEAL_RATIO)
+#define STEAL_RATIO_LOW (5 * STEAL_RATIO)
+
 static unsigned int max_virtual_cores __read_mostly;
 static unsigned int entitled_cores __read_mostly;
 static unsigned int available_cores;
@@ -311,6 +322,49 @@ static unsigned int pseries_num_available_cores(void)
 
 	return available_cores;
 }
+
+void trigger_softoffline(unsigned long steal_ratio)
+{
+	int currcpu = smp_processor_id();
+	static int prev_direction;
+	int cpu, i;
+
+	if (steal_ratio >= STEAL_RATIO_HIGH && prev_direction > 0) {
+		/*
+		 * System entitlement was reduced earlier but we continue to
+		 * see steal time. Reduce entitlement further.
+		 */
+		cpu = cpumask_last(cpu_active_mask);
+		for_each_cpu_andnot(i, cpu_sibling_mask(cpu), cpu_sibling_mask(currcpu)) {
+			struct offline_worker *worker = &per_cpu(offline_workers, i);
+
+			worker->offline = 1;
+			schedule_work_on(i, &worker->work);
+		}
+	} else if (steal_ratio <= STEAL_RATIO_LOW && prev_direction < 0) {
+		/*
+		 * System entitlement was increased but we continue to see
+		 * less steal time. Increase entitlement further.
+		 */
+		cpumask_andnot(cpus, cpu_online_mask, cpu_active_mask);
+		if (cpumask_empty(cpus))
+			return;
+
+		cpu = cpumask_first(cpus);
+		for_each_cpu_andnot(i, cpu_sibling_mask(cpu), cpu_sibling_mask(currcpu)) {
+			struct offline_worker *worker = &per_cpu(offline_workers, i);
+
+			worker->offline = 0;
+			schedule_work_on(i, &worker->work);
+		}
+	}
+	if (steal_ratio >= STEAL_RATIO_HIGH)
+		prev_direction = 1;
+	else if (steal_ratio <= STEAL_RATIO_LOW)
+		prev_direction = -1;
+	else
+		prev_direction = 0;
+}
 #endif
 
 static struct smp_ops_t pseries_smp_ops = {
@@ -336,6 +390,9 @@ void __init smp_init_pseries(void)
 	smp_ops = &pseries_smp_ops;
 
 	alloc_bootmem_cpumask_var(&of_spin_mask);
+#ifdef CONFIG_PPC_SPLPAR
+	alloc_bootmem_cpumask_var(&cpus);
+#endif
 
 	/*
 	 * Mark threads which are still spinning in hold loops
-- 
2.43.7


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ