lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 7 Sep 2017 10:29:32 -0400
From:   Prarit Bhargava <prarit@...hat.com>
To:     Andi Kleen <andi@...stfloor.org>, x86@...nel.org
Cc:     kan.liang@...el.com, peterz@...radead.org,
        linux-kernel@...r.kernel.org, Andi Kleen <ak@...ux.intel.com>
Subject: Re: [PATCH 2/2] x86/topology: Avoid wasting 128k for package id array



On 09/06/2017 07:17 PM, Andi Kleen wrote:
> From: Andi Kleen <ak@...ux.intel.com>
> 
> I was looking at large early boot allocations and noticed that
> since (1f12e32f x86/topology: Create logical package id)
> every 64bit system allocates a 128k array to convert logical
> package ids.
> 
> This happens because the array is sized for
> MAX_LOCAL_APIC and that is always 32k on 64bit systems,
> and it needs 4 bytes for each entry.
> 
> This is fairly wasteful, especially for the common case
> of having only one socket, where we need 128K just to store
> a single 4 byte value.
> 
> The max logical APIC value is not known at this point,
> so it's hard to size it correctly.
> 
> The previous patch converted the only performance critical
> user to cache the value, and all others are fairly
> slow path, so we can just convert the O(1) array
> lookup to a linear search in cpu_data()
> 
> This can also avoid the need for an extra bitmap structure
> to know if the logical package ID is already allocated.
> We can also save this information in cpu_data and look it
> up during the search.
> 
> This patch removes the explicit arrays and replaces
> the lookups with explicit searches.
> 
> Overall the new code is somewhat simpler, and needs a lot
> less run time memory.
> 
>  arch/x86/include/asm/processor.h |  6 +++++-
>  arch/x86/kernel/smpboot.c        | 47 ++++++++++++++++-------------------------------
>  2 files changed, 21 insertions(+), 32 deletions(-)
> 
> The naming of the variables in cpu_data is still not
> great (_proc sometimes means package and sometimes means
> logical processor), but I followed the existing (messy)
> conventions when possible. At some point would be probably good
> to clean this up too.
> 
> Tested on a 2S system, but it would be good
> to test on more obscure systems which may have problems
> with package IDs. I'm copying Prarit who had problematic systems
> before.
> 
> Signed-off-by: Andi Kleen <ak@...ux.intel.com>
> ---
>  arch/x86/include/asm/processor.h |  6 ++++-
>  arch/x86/kernel/smpboot.c        | 47 ++++++++++++++--------------------------
>  2 files changed, 21 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 3fa26a61eabc..d369d2a82d8f 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -124,13 +124,17 @@ struct cpuinfo_x86 {
>  	u16			booted_cores;
>  	/* Physical processor id: */
>  	u16			phys_proc_id;
> -	/* Logical processor id: */
> +	/* Logical processor (package) id: */
>  	u16			logical_proc_id;
> +	/* Physical package ID */
> +	u16			phys_pkg_id;
>  	/* Core id: */
>  	u16			cpu_core_id;
>  	/* Index into per_cpu list: */
>  	u16			cpu_index;
>  	u32			microcode;
> +	/* Flags */
> +	unsigned		logical_proc_set : 1;
>  } __randomize_layout;
>  
>  struct cpuid_regs {
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 54b9e89d4d6b..e78460aeca0a 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -100,9 +100,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
>  EXPORT_PER_CPU_SYMBOL(cpu_info);
>  
>  /* Logical package management. We might want to allocate that dynamically */
> -static int *physical_to_logical_pkg __read_mostly;
> -static unsigned long *physical_package_map __read_mostly;;
> -static unsigned int max_physical_pkg_id __read_mostly;
>  unsigned int __max_logical_packages __read_mostly;
>  EXPORT_SYMBOL(__max_logical_packages);
>  static unsigned int logical_packages __read_mostly;
> @@ -282,17 +279,12 @@ static void notrace start_secondary(void *unused)
>   */
>  int topology_update_package_map(unsigned int pkg, unsigned int cpu)
>  {
> -	unsigned int new;
> +	int new;
>  
> -	/* Called from early boot ? */
> -	if (!physical_package_map)
> -		return 0;
>  
> -	if (pkg >= max_physical_pkg_id)
> -		return -EINVAL;
> -
> -	/* Set the logical package id */
> -	if (test_and_set_bit(pkg, physical_package_map))
> +	/* Already available somewhere? */
> +	new = topology_phys_to_logical_pkg(pkg);
> +	if (new >= 0)
>  		goto found;
>  
>  	if (logical_packages >= __max_logical_packages) {
> @@ -306,10 +298,11 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu)
>  		pr_info("CPU %u Converting physical %u to logical package %u\n",
>  			cpu, pkg, new);
>  	}
> -	physical_to_logical_pkg[pkg] = new;
>  
>  found:
> -	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
> +	cpu_data(cpu).phys_pkg_id = pkg;
> +	cpu_data(cpu).logical_proc_id = new;
> +	cpu_data(cpu).logical_proc_set = 1;
>  	return 0;
>  }
>  
> @@ -320,16 +313,21 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu)
>   */
>  int topology_phys_to_logical_pkg(unsigned int phys_pkg)
>  {
> -	if (phys_pkg >= max_physical_pkg_id)
> -		return -1;
> -	return physical_to_logical_pkg[phys_pkg];
> +	int cpu;
> +
> +	for_each_possible_cpu (cpu) {
> +		if (cpu_data(cpu).phys_pkg_id == phys_pkg &&
> +		    cpu_data(cpu).logical_proc_set) {
> +			return cpu_data(cpu).logical_proc_id;
> +		}
> +	}
> +	return -1;
>  }
>  EXPORT_SYMBOL(topology_phys_to_logical_pkg);
>  
>  static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
>  {
>  	unsigned int ncpus;
> -	size_t size;
>  
>  	/*
>  	 * Today neither Intel nor AMD support heterogenous systems. That
> @@ -362,19 +360,6 @@ static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
>  	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
>  	logical_packages = 0;
>  
> -	/*
> -	 * Possibly larger than what we need as the number of apic ids per
> -	 * package can be smaller than the actual used apic ids.
> -	 */
> -	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
> -	size = max_physical_pkg_id * sizeof(unsigned int);
> -	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
> -	memset(physical_to_logical_pkg, 0xff, size);
> -	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
> -	physical_package_map = kzalloc(size, GFP_KERNEL);
> -
> -	pr_info("Max logical packages: %u\n", __max_logical_packages);

^^ This pr_info needs to stay IMO.  ... testing now.

P.

> -
>  	topology_update_package_map(c->phys_proc_id, cpu);
>  }
>  
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ