lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <760e230c-5699-485c-910b-ebeaa9f9bd09@linux.ibm.com>
Date: Thu, 26 Jun 2025 19:09:13 +0530
From: Shrikanth Hegde <sshegde@...ux.ibm.com>
To: Yury Norov <yury.norov@...il.com>
Cc: mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, tglx@...utronix.de, maddy@...ux.ibm.com,
        vschneid@...hat.com, dietmar.eggemann@....com, rostedt@...dmis.org,
        kprateek.nayak@....com, huschle@...ux.ibm.com, srikar@...ux.ibm.com,
        linux-kernel@...r.kernel.org, christophe.leroy@...roup.eu,
        linuxppc-dev@...ts.ozlabs.org, gregkh@...uxfoundation.org
Subject: Re: [RFC v2 9/9] [DEBUG] powerpc: add debug file for set/unset cpu
 avoid


Hi Yury, Thanks for taking a look at this.

> On Thu, Jun 26, 2025 at 12:41:08AM +0530, Shrikanth Hegde wrote:
>> Reference patch for how an architecture can make use of this infra.
>>
>> This is not meant to be merged. Instead the vp_manual_hint should either
>> come from hardware or could be derived using steal time.
> 
> If you don't add any code that manages the 'avoid' mask on the host
> side, all this becomes a dead code.

Ok.

Maybe I can keep this debug file, until we get the infra where
the hint derivation would be done by hardware by means of hcall or gets 
calculated based on steal time.

I think i will have polish this a bit and move it to appropriate place 
if this is to be kept.

>   
>> When the provided hint is less than the total CPUs in the system, it
>> will enable the cpu avoid static key and set those CPUs as avoid.
>>
>> Signed-off-by: Shrikanth Hegde <sshegde@...ux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/paravirt.h |  2 ++
>>   arch/powerpc/kernel/smp.c           | 50 +++++++++++++++++++++++++++++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
>> index b78b82d66057..b6497e0b60d8 100644
>> --- a/arch/powerpc/include/asm/paravirt.h
>> +++ b/arch/powerpc/include/asm/paravirt.h
>> @@ -10,6 +10,8 @@
>>   #include <asm/hvcall.h>
>>   #endif
>>   
>> +DECLARE_STATIC_KEY_FALSE(paravirt_cpu_avoid_enabled);
>> +
>>   #ifdef CONFIG_PPC_SPLPAR
>>   #include <linux/smp.h>
>>   #include <asm/kvm_guest.h>
>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>> index 5ac7084eebc0..e00cdc4de441 100644
>> --- a/arch/powerpc/kernel/smp.c
>> +++ b/arch/powerpc/kernel/smp.c
>> @@ -64,6 +64,7 @@
>>   #include <asm/systemcfg.h>
>>   
>>   #include <trace/events/ipi.h>
>> +#include <linux/debugfs.h>
>>   
>>   #ifdef DEBUG
>>   #include <asm/udbg.h>
>> @@ -82,6 +83,7 @@ bool has_big_cores __ro_after_init;
>>   bool coregroup_enabled __ro_after_init;
>>   bool thread_group_shares_l2 __ro_after_init;
>>   bool thread_group_shares_l3 __ro_after_init;
>> +static int vp_manual_hint = NR_CPUS;
>>   
>>   DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>>   DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
>> @@ -1727,6 +1729,7 @@ static void __init build_sched_topology(void)
>>   	BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
>>   
>>   	set_sched_topology(powerpc_topology);
>> +	vp_manual_hint = num_present_cpus();
>>   }
>>   
>>   void __init smp_cpus_done(unsigned int max_cpus)
>> @@ -1807,4 +1810,51 @@ void __noreturn arch_cpu_idle_dead(void)
>>   	start_secondary_resume();
>>   }
>>   
>> +/*
>> + * sysfs hint to mark CPUs as Avoid. This will help in restricting
>> + * the workload to specified number of CPUs.
>> + * For example 40 > vp_manual_hint means, workload will run on
>> + * 0-39 CPUs.
>> + */
>> +
>> +static int pv_vp_manual_hint_set(void *data, u64 val)
>> +{
>> +	int cpu;
>> +
>> +	if (val == 0 || vp_manual_hint > num_present_cpus())

This should be
	if (val == 0 || val > num_present_cpus())

>> +		vp_manual_hint = num_present_cpus();
>> +
>> +	if (val != vp_manual_hint)
>> +		vp_manual_hint = val;
> 
> This all is effectively just:
> 
> 	vp_manual_hint = val;
> 
> Isn't?

Yes, With some checks for sane values.

> 
>> +	if (vp_manual_hint < num_present_cpus())
>> +		static_branch_enable(&paravirt_cpu_avoid_enabled);
>> +	else
>> +		static_branch_disable(&paravirt_cpu_avoid_enabled);
>> +
>> +	for_each_present_cpu(cpu) {
>> +		if (cpu >= vp_manual_hint)
>> +			set_cpu_avoid(cpu, true);
>> +		else
>> +			set_cpu_avoid(cpu, false);
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int pv_vp_manual_hint_get(void *data, u64 *val)
>> +{
>> +	*val = vp_manual_hint;
>> +	return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_vp_manual_hint, pv_vp_manual_hint_get, pv_vp_manual_hint_set, "%llu\n");
>> +
>> +static __init int paravirt_debugfs_init(void)
>> +{
>> +	if (is_shared_processor())
>> +		debugfs_create_file("vp_manual_hint", 0600, arch_debugfs_dir, NULL, &fops_pv_vp_manual_hint);
>> +	return 0;
>> +}
>> +
>> +device_initcall(paravirt_debugfs_init)
>>   #endif
>> -- 
>> 2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ