lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.LFD.2.21.1804160453140.16188@casper.infradead.org>
Date:   Mon, 16 Apr 2018 04:53:28 +0100 (BST)
From:   James Simmons <jsimmons@...radead.org>
To:     NeilBrown <neilb@...e.com>
cc:     Oleg Drokin <oleg.drokin@...el.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Andreas Dilger <andreas.dilger@...el.com>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Lustre Development List <lustre-devel@...ts.lustre.org>
Subject: Re: [PATCH 4/6] staging: lustre: rearrange placement of CPU partition
 management code.


> Currently the code for cpu-partition tables lives in various places.
> The non-SMP code is partly in libcfs/libcfs_cpu.h as static inlines,
> and partly in lnet/libcfs/libcfs_cpu.c - some of the functions are
> tiny and could well be inlines.
> 
> The SMP code is all in lnet/libcfs/linux/linux-cpu.c.
> 
> This patch moves all the trivial non-SMP functions into
> libcfs_cpu.h as inlines, and all the SMP functions into libcfs_cpu.c
> with the non-trival !SMP code.
> 
> Now when you go looking for some function, it is easier to find both
> versions together when neither is trivial.
> 
> There is no code change here - just code movement.
> 
> Signed-off-by: NeilBrown <neilb@...e.com>

Nak. SMP will be reworked.

> ---
>  .../lustre/include/linux/libcfs/libcfs_cpu.h       |  173 +++
>  drivers/staging/lustre/lnet/libcfs/Makefile        |    1 
>  drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c    |  959 +++++++++++++++++-
>  .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 1079 --------------------
>  4 files changed, 1076 insertions(+), 1136 deletions(-)
>  delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
> 
> diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
> index 829c35e68db8..813ba4564bb9 100644
> --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
> +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
> @@ -117,41 +117,6 @@ cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
>   * print string information of cpt-table
>   */
>  int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
> -#else /* !CONFIG_SMP */
> -struct cfs_cpt_table {
> -	/* # of CPU partitions */
> -	int			ctb_nparts;
> -	/* cpu mask */
> -	cpumask_t		ctb_mask;
> -	/* node mask */
> -	nodemask_t		ctb_nodemask;
> -	/* version */
> -	u64			ctb_version;
> -};
> -
> -static inline cpumask_var_t *
> -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	return NULL;
> -}
> -
> -static inline int
> -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
> -{
> -	return 0;
> -}
> -#endif /* CONFIG_SMP */
> -
> -extern struct cfs_cpt_table	*cfs_cpt_table;
> -
> -/**
> - * destroy a CPU partition table
> - */
> -void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
> -/**
> - * create a cfs_cpt_table with \a ncpt number of partitions
> - */
> -struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
>  /**
>   * return total number of CPU partitions in \a cptab
>   */
> @@ -237,6 +202,144 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
>   */
>  int cfs_cpu_ht_nsiblings(int cpu);
>  
> +#else /* !CONFIG_SMP */
> +struct cfs_cpt_table {
> +	/* # of CPU partitions */
> +	int			ctb_nparts;
> +	/* cpu mask */
> +	cpumask_t		ctb_mask;
> +	/* node mask */
> +	nodemask_t		ctb_nodemask;
> +	/* version */
> +	u64			ctb_version;
> +};
> +
> +static inline cpumask_var_t *
> +cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return NULL;
> +}
> +
> +static inline int
> +cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
> +{
> +	return 0;
> +}
> +static inline int
> +cfs_cpt_number(struct cfs_cpt_table *cptab)
> +{
> +	return 1;
> +}
> +
> +static inline int
> +cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return 1;
> +}
> +
> +static inline int
> +cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return 1;
> +}
> +
> +static inline nodemask_t *
> +cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return &cptab->ctb_nodemask;
> +}
> +
> +static inline int
> +cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
> +{
> +	return 1;
> +}
> +
> +static inline void
> +cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
> +{
> +}
> +
> +static inline int
> +cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
> +{
> +	return 1;
> +}
> +
> +static inline void
> +cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
> +{
> +}
> +
> +static inline int
> +cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
> +{
> +	return 1;
> +}
> +
> +static inline void
> +cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
> +{
> +}
> +
> +static inline int
> +cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
> +{
> +	return 1;
> +}
> +
> +static inline void
> +cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
> +{
> +}
> +
> +static inline void
> +cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
> +{
> +}
> +
> +static inline int
> +cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return 0;
> +}
> +
> +static inline int
> +cfs_cpu_ht_nsiblings(int cpu)
> +{
> +	return 1;
> +}
> +
> +static inline int
> +cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
> +{
> +	return 0;
> +}
> +
> +static inline int
> +cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
> +{
> +	return 0;
> +}
> +
> +static inline int
> +cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_SMP */
> +
> +extern struct cfs_cpt_table	*cfs_cpt_table;
> +
> +/**
> + * destroy a CPU partition table
> + */
> +void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
> +/**
> + * create a cfs_cpt_table with \a ncpt number of partitions
> + */
> +struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
> +
>  /*
>   * allocate per-cpu-partition data, returned value is an array of pointers,
>   * variable can be indexed by CPU ID.
> diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile
> index 36b49a6b7b88..673fe348c445 100644
> --- a/drivers/staging/lustre/lnet/libcfs/Makefile
> +++ b/drivers/staging/lustre/lnet/libcfs/Makefile
> @@ -5,7 +5,6 @@ subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
>  obj-$(CONFIG_LNET) += libcfs.o
>  
>  libcfs-linux-objs := linux-tracefile.o linux-debug.o
> -libcfs-linux-objs += linux-cpu.o
>  libcfs-linux-objs += linux-module.o
>  libcfs-linux-objs += linux-crypto.o
>  libcfs-linux-objs += linux-crypto-adler.o
> diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> index 5818f641455f..ac6fd11ae9d6 100644
> --- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> @@ -36,11 +36,110 @@
>  /** Global CPU partition table */
>  struct cfs_cpt_table   *cfs_cpt_table __read_mostly;
>  EXPORT_SYMBOL(cfs_cpt_table);
> +#define DEBUG_SUBSYSTEM S_LNET
> +
> +#include <linux/cpu.h>
> +#include <linux/sched.h>
> +#include <linux/libcfs/libcfs.h>
> +
> +#ifdef CONFIG_SMP
> +/**
> + * modparam for setting number of partitions
> + *
> + *  0 : estimate best value based on cores or NUMA nodes
> + *  1 : disable multiple partitions
> + * >1 : specify number of partitions
> + */
> +static int	cpu_npartitions;
> +module_param(cpu_npartitions, int, 0444);
> +MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
> +
> +/**
> + * modparam for setting CPU partitions patterns:
> + *
> + * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
> + *      number in bracket is processor ID (core or HT)
> + *
> + * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
> + *       are NUMA node ID, number before bracket is CPU partition ID.
> + *
> + * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
> + *
> + * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
> + */
> +static char	*cpu_pattern = "N";
> +module_param(cpu_pattern, charp, 0444);
> +MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
>  
> -#ifndef CONFIG_SMP
> +static struct cfs_cpt_data {
> +	/* serialize hotplug etc */
> +	spinlock_t		cpt_lock;
> +	/* reserved for hotplug */
> +	unsigned long		cpt_version;
> +	/* mutex to protect cpt_cpumask */
> +	struct mutex		cpt_mutex;
> +	/* scratch buffer for set/unset_node */
> +	cpumask_var_t		cpt_cpumask;
> +} cpt_data;
> +#endif
>  
>  #define CFS_CPU_VERSION_MAGIC	   0xbabecafe
>  
> +#ifdef CONFIG_SMP
> +struct cfs_cpt_table *
> +cfs_cpt_table_alloc(unsigned int ncpt)
> +{
> +	struct cfs_cpt_table *cptab;
> +	int i;
> +
> +	cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
> +	if (!cptab)
> +		return NULL;
> +
> +	cptab->ctb_nparts = ncpt;
> +
> +	cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
> +				      GFP_NOFS);
> +	if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
> +	    !cptab->ctb_nodemask)
> +		goto failed;
> +
> +	cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
> +					    sizeof(cptab->ctb_cpu2cpt[0]),
> +					    GFP_KERNEL);
> +	if (!cptab->ctb_cpu2cpt)
> +		goto failed;
> +
> +	memset(cptab->ctb_cpu2cpt, -1,
> +	       num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
> +
> +	cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
> +					  GFP_KERNEL);
> +	if (!cptab->ctb_parts)
> +		goto failed;
> +
> +	for (i = 0; i < ncpt; i++) {
> +		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
> +
> +		part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
> +					     GFP_NOFS);
> +		if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
> +		    !part->cpt_nodemask)
> +			goto failed;
> +	}
> +
> +	spin_lock(&cpt_data.cpt_lock);
> +	/* Reserved for hotplug */
> +	cptab->ctb_version = cpt_data.cpt_version;
> +	spin_unlock(&cpt_data.cpt_lock);
> +
> +	return cptab;
> +
> + failed:
> +	cfs_cpt_table_free(cptab);
> +	return NULL;
> +}
> +#else /* ! CONFIG_SMP */
>  struct cfs_cpt_table *
>  cfs_cpt_table_alloc(unsigned int ncpt)
>  {
> @@ -60,8 +159,32 @@ cfs_cpt_table_alloc(unsigned int ncpt)
>  
>  	return cptab;
>  }
> +#endif /* CONFIG_SMP */
>  EXPORT_SYMBOL(cfs_cpt_table_alloc);
>  
> +#ifdef CONFIG_SMP
> +void
> +cfs_cpt_table_free(struct cfs_cpt_table *cptab)
> +{
> +	int i;
> +
> +	kvfree(cptab->ctb_cpu2cpt);
> +
> +	for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
> +		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
> +
> +		kfree(part->cpt_nodemask);
> +		free_cpumask_var(part->cpt_cpumask);
> +	}
> +
> +	kvfree(cptab->ctb_parts);
> +
> +	kfree(cptab->ctb_nodemask);
> +	free_cpumask_var(cptab->ctb_cpumask);
> +
> +	kfree(cptab);
> +}
> +#else /* ! CONFIG_SMP */
>  void
>  cfs_cpt_table_free(struct cfs_cpt_table *cptab)
>  {
> @@ -69,55 +192,153 @@ cfs_cpt_table_free(struct cfs_cpt_table *cptab)
>  
>  	kfree(cptab);
>  }
> +#endif /* CONFIG_SMP */
>  EXPORT_SYMBOL(cfs_cpt_table_free);
>  
>  #ifdef CONFIG_SMP
>  int
>  cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
>  {
> -	int rc;
> +	char *tmp = buf;
> +	int rc = 0;
> +	int i;
> +	int j;
>  
> -	rc = snprintf(buf, len, "%d\t: %d\n", 0, 0);
> -	len -= rc;
> -	if (len <= 0)
> -		return -EFBIG;
> +	for (i = 0; i < cptab->ctb_nparts; i++) {
> +		if (len > 0) {
> +			rc = snprintf(tmp, len, "%d\t: ", i);
> +			len -= rc;
> +		}
>  
> -	return rc;
> +		if (len <= 0) {
> +			rc = -EFBIG;
> +			goto out;
> +		}
> +
> +		tmp += rc;
> +		for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
> +			rc = snprintf(tmp, len, "%d ", j);
> +			len -= rc;
> +			if (len <= 0) {
> +				rc = -EFBIG;
> +				goto out;
> +			}
> +			tmp += rc;
> +		}
> +
> +		*tmp = '\n';
> +		tmp++;
> +		len--;
> +	}
> +
> + out:
> +	if (rc < 0)
> +		return rc;
> +
> +	return tmp - buf;
>  }
>  EXPORT_SYMBOL(cfs_cpt_table_print);
>  #endif /* CONFIG_SMP */
>  
> +#ifdef CONFIG_SMP
> +static void
> +cfs_node_to_cpumask(int node, cpumask_t *mask)
> +{
> +	const cpumask_t *tmp = cpumask_of_node(node);
> +
> +	if (tmp)
> +		cpumask_copy(mask, tmp);
> +	else
> +		cpumask_clear(mask);
> +}
> +
>  int
>  cfs_cpt_number(struct cfs_cpt_table *cptab)
>  {
> -	return 1;
> +	return cptab->ctb_nparts;
>  }
>  EXPORT_SYMBOL(cfs_cpt_number);
>  
>  int
>  cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
>  {
> -	return 1;
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	return cpt == CFS_CPT_ANY ?
> +	       cpumask_weight(cptab->ctb_cpumask) :
> +	       cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
>  }
>  EXPORT_SYMBOL(cfs_cpt_weight);
>  
>  int
>  cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
>  {
> -	return 1;
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	return cpt == CFS_CPT_ANY ?
> +	       cpumask_any_and(cptab->ctb_cpumask,
> +			       cpu_online_mask) < nr_cpu_ids :
> +	       cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
> +			       cpu_online_mask) < nr_cpu_ids;
>  }
>  EXPORT_SYMBOL(cfs_cpt_online);
>  
> +cpumask_var_t *
> +cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
> +{
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	return cpt == CFS_CPT_ANY ?
> +	       &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
> +}
> +EXPORT_SYMBOL(cfs_cpt_cpumask);
> +
>  nodemask_t *
>  cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
>  {
> -	return &cptab->ctb_nodemask;
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	return cpt == CFS_CPT_ANY ?
> +	       cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
>  }
>  EXPORT_SYMBOL(cfs_cpt_nodemask);
>  
>  int
>  cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
>  {
> +	int node;
> +
> +	LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
> +
> +	if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
> +		CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
> +		return 0;
> +	}
> +
> +	if (cptab->ctb_cpu2cpt[cpu] != -1) {
> +		CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
> +		       cpu, cptab->ctb_cpu2cpt[cpu]);
> +		return 0;
> +	}
> +
> +	cptab->ctb_cpu2cpt[cpu] = cpt;
> +
> +	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
> +	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
> +
> +	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
> +	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
> +
> +	node = cpu_to_node(cpu);
> +
> +	/* first CPU of @node in this CPT table */
> +	if (!node_isset(node, *cptab->ctb_nodemask))
> +		node_set(node, *cptab->ctb_nodemask);
> +
> +	/* first CPU of @node in this partition */
> +	if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
> +		node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
> +
>  	return 1;
>  }
>  EXPORT_SYMBOL(cfs_cpt_set_cpu);
> @@ -125,12 +346,80 @@ EXPORT_SYMBOL(cfs_cpt_set_cpu);
>  void
>  cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
>  {
> +	int node;
> +	int i;
> +
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	if (cpu < 0 || cpu >= nr_cpu_ids) {
> +		CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
> +		return;
> +	}
> +
> +	if (cpt == CFS_CPT_ANY) {
> +		/* caller doesn't know the partition ID */
> +		cpt = cptab->ctb_cpu2cpt[cpu];
> +		if (cpt < 0) { /* not set in this CPT-table */
> +			CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
> +			       cpt, cptab);
> +			return;
> +		}
> +
> +	} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
> +		CDEBUG(D_INFO,
> +		       "CPU %d is not in cpu-partition %d\n", cpu, cpt);
> +		return;
> +	}
> +
> +	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
> +	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
> +
> +	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
> +	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
> +	cptab->ctb_cpu2cpt[cpu] = -1;
> +
> +	node = cpu_to_node(cpu);
> +
> +	LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
> +	LASSERT(node_isset(node, *cptab->ctb_nodemask));
> +
> +	for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
> +		/* this CPT has other CPU belonging to this node? */
> +		if (cpu_to_node(i) == node)
> +			break;
> +	}
> +
> +	if (i >= nr_cpu_ids)
> +		node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
> +
> +	for_each_cpu(i, cptab->ctb_cpumask) {
> +		/* this CPT-table has other CPU belonging to this node? */
> +		if (cpu_to_node(i) == node)
> +			break;
> +	}
> +
> +	if (i >= nr_cpu_ids)
> +		node_clear(node, *cptab->ctb_nodemask);
>  }
>  EXPORT_SYMBOL(cfs_cpt_unset_cpu);
>  
>  int
>  cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
>  {
> +	int i;
> +
> +	if (!cpumask_weight(mask) ||
> +	    cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
> +		CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
> +		       cpt);
> +		return 0;
> +	}
> +
> +	for_each_cpu(i, mask) {
> +		if (!cfs_cpt_set_cpu(cptab, cpt, i))
> +			return 0;
> +	}
> +
>  	return 1;
>  }
>  EXPORT_SYMBOL(cfs_cpt_set_cpumask);
> @@ -138,25 +427,65 @@ EXPORT_SYMBOL(cfs_cpt_set_cpumask);
>  void
>  cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
>  {
> +	int i;
> +
> +	for_each_cpu(i, mask)
> +		cfs_cpt_unset_cpu(cptab, cpt, i);
>  }
>  EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
>  
>  int
>  cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
>  {
> -	return 1;
> +	int rc;
> +
> +	if (node < 0 || node >= MAX_NUMNODES) {
> +		CDEBUG(D_INFO,
> +		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
> +		return 0;
> +	}
> +
> +	mutex_lock(&cpt_data.cpt_mutex);
> +
> +	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
> +
> +	rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
> +
> +	mutex_unlock(&cpt_data.cpt_mutex);
> +
> +	return rc;
>  }
>  EXPORT_SYMBOL(cfs_cpt_set_node);
>  
>  void
>  cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
>  {
> +	if (node < 0 || node >= MAX_NUMNODES) {
> +		CDEBUG(D_INFO,
> +		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
> +		return;
> +	}
> +
> +	mutex_lock(&cpt_data.cpt_mutex);
> +
> +	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
> +
> +	cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
> +
> +	mutex_unlock(&cpt_data.cpt_mutex);
>  }
>  EXPORT_SYMBOL(cfs_cpt_unset_node);
>  
>  int
>  cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
>  {
> +	int i;
> +
> +	for_each_node_mask(i, *mask) {
> +		if (!cfs_cpt_set_node(cptab, cpt, i))
> +			return 0;
> +	}
> +
>  	return 1;
>  }
>  EXPORT_SYMBOL(cfs_cpt_set_nodemask);
> @@ -164,50 +493,638 @@ EXPORT_SYMBOL(cfs_cpt_set_nodemask);
>  void
>  cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
>  {
> +	int i;
> +
> +	for_each_node_mask(i, *mask)
> +		cfs_cpt_unset_node(cptab, cpt, i);
>  }
>  EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
>  
>  void
>  cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
>  {
> +	int last;
> +	int i;
> +
> +	if (cpt == CFS_CPT_ANY) {
> +		last = cptab->ctb_nparts - 1;
> +		cpt = 0;
> +	} else {
> +		last = cpt;
> +	}
> +
> +	for (; cpt <= last; cpt++) {
> +		for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
> +			cfs_cpt_unset_cpu(cptab, cpt, i);
> +	}
>  }
>  EXPORT_SYMBOL(cfs_cpt_clear);
>  
>  int
>  cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
>  {
> +	nodemask_t *mask;
> +	int weight;
> +	int rotor;
> +	int node;
> +
> +	/* convert CPU partition ID to HW node id */
> +
> +	if (cpt < 0 || cpt >= cptab->ctb_nparts) {
> +		mask = cptab->ctb_nodemask;
> +		rotor = cptab->ctb_spread_rotor++;
> +	} else {
> +		mask = cptab->ctb_parts[cpt].cpt_nodemask;
> +		rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
> +	}
> +
> +	weight = nodes_weight(*mask);
> +	LASSERT(weight > 0);
> +
> +	rotor %= weight;
> +
> +	for_each_node_mask(node, *mask) {
> +		if (!rotor--)
> +			return node;
> +	}
> +
> +	LBUG();
>  	return 0;
>  }
>  EXPORT_SYMBOL(cfs_cpt_spread_node);
>  
> -int
> -cfs_cpu_ht_nsiblings(int cpu)
> -{
> -	return 1;
> -}
> -EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
> -
>  int
>  cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
>  {
> -	return 0;
> +	int cpu;
> +	int cpt;
> +
> +	preempt_disable();
> +	cpu = smp_processor_id();
> +	cpt = cptab->ctb_cpu2cpt[cpu];
> +
> +	if (cpt < 0 && remap) {
> +		/* don't return negative value for safety of upper layer,
> +		 * instead we shadow the unknown cpu to a valid partition ID
> +		 */
> +		cpt = cpu % cptab->ctb_nparts;
> +	}
> +	preempt_enable();
> +	return cpt;
>  }
>  EXPORT_SYMBOL(cfs_cpt_current);
>  
>  int
>  cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
>  {
> -	return 0;
> +	LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
> +
> +	return cptab->ctb_cpu2cpt[cpu];
>  }
>  EXPORT_SYMBOL(cfs_cpt_of_cpu);
>  
>  int
>  cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
>  {
> +	cpumask_var_t *cpumask;
> +	nodemask_t *nodemask;
> +	int rc;
> +	int i;
> +
> +	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> +
> +	if (cpt == CFS_CPT_ANY) {
> +		cpumask = &cptab->ctb_cpumask;
> +		nodemask = cptab->ctb_nodemask;
> +	} else {
> +		cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
> +		nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
> +	}
> +
> +	if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
> +		CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
> +		       cpt);
> +		return -EINVAL;
> +	}
> +
> +	for_each_online_cpu(i) {
> +		if (cpumask_test_cpu(i, *cpumask))
> +			continue;
> +
> +		rc = set_cpus_allowed_ptr(current, *cpumask);
> +		set_mems_allowed(*nodemask);
> +		if (!rc)
> +			schedule(); /* switch to allowed CPU */
> +
> +		return rc;
> +	}
> +
> +	/* don't need to set affinity because all online CPUs are covered */
>  	return 0;
>  }
>  EXPORT_SYMBOL(cfs_cpt_bind);
>  
> +#endif
> +
> +#ifdef CONFIG_SMP
> +
> +/**
> + * Choose max to \a number CPUs from \a node and set them in \a cpt.
> + * We always prefer to choose CPU in the same core/socket.
> + */
> +static int
> +cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
> +		     cpumask_t *node, int number)
> +{
> +	cpumask_var_t socket;
> +	cpumask_var_t core;
> +	int rc = 0;
> +	int cpu;
> +
> +	LASSERT(number > 0);
> +
> +	if (number >= cpumask_weight(node)) {
> +		while (!cpumask_empty(node)) {
> +			cpu = cpumask_first(node);
> +
> +			rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
> +			if (!rc)
> +				return -EINVAL;
> +			cpumask_clear_cpu(cpu, node);
> +		}
> +		return 0;
> +	}
> +
> +	/*
> +	 * Allocate scratch buffers
> +	 * As we cannot initialize a cpumask_var_t, we need
> +	 * to alloc both before we can risk trying to free either
> +	 */
> +	if (!zalloc_cpumask_var(&socket, GFP_NOFS))
> +		rc = -ENOMEM;
> +	if (!zalloc_cpumask_var(&core, GFP_NOFS))
> +		rc = -ENOMEM;
> +	if (rc)
> +		goto out;
> +
> +	while (!cpumask_empty(node)) {
> +		cpu = cpumask_first(node);
> +
> +		/* get cpumask for cores in the same socket */
> +		cpumask_copy(socket, topology_core_cpumask(cpu));
> +		cpumask_and(socket, socket, node);
> +
> +		LASSERT(!cpumask_empty(socket));
> +
> +		while (!cpumask_empty(socket)) {
> +			int i;
> +
> +			/* get cpumask for hts in the same core */
> +			cpumask_copy(core, topology_sibling_cpumask(cpu));
> +			cpumask_and(core, core, node);
> +
> +			LASSERT(!cpumask_empty(core));
> +
> +			for_each_cpu(i, core) {
> +				cpumask_clear_cpu(i, socket);
> +				cpumask_clear_cpu(i, node);
> +
> +				rc = cfs_cpt_set_cpu(cptab, cpt, i);
> +				if (!rc) {
> +					rc = -EINVAL;
> +					goto out;
> +				}
> +
> +				if (!--number)
> +					goto out;
> +			}
> +			cpu = cpumask_first(socket);
> +		}
> +	}
> +
> +out:
> +	free_cpumask_var(socket);
> +	free_cpumask_var(core);
> +	return rc;
> +}
> +
> +#define CPT_WEIGHT_MIN  4u
> +
> +static unsigned int
> +cfs_cpt_num_estimate(void)
> +{
> +	unsigned int nnode = num_online_nodes();
> +	unsigned int ncpu = num_online_cpus();
> +	unsigned int ncpt;
> +
> +	if (ncpu <= CPT_WEIGHT_MIN) {
> +		ncpt = 1;
> +		goto out;
> +	}
> +
> +	/* generate reasonable number of CPU partitions based on total number
> +	 * of CPUs, Preferred N should be power2 and match this condition:
> +	 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
> +	 */
> +	for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
> +		;
> +
> +	if (ncpt <= nnode) { /* fat numa system */
> +		while (nnode > ncpt)
> +			nnode >>= 1;
> +
> +	} else { /* ncpt > nnode */
> +		while ((nnode << 1) <= ncpt)
> +			nnode <<= 1;
> +	}
> +
> +	ncpt = nnode;
> +
> +out:
> +#if (BITS_PER_LONG == 32)
> +	/* config many CPU partitions on 32-bit system could consume
> +	 * too much memory
> +	 */
> +	ncpt = min(2U, ncpt);
> +#endif
> +	while (ncpu % ncpt)
> +		ncpt--; /* worst case is 1 */
> +
> +	return ncpt;
> +}
> +
> +static struct cfs_cpt_table *
> +cfs_cpt_table_create(int ncpt)
> +{
> +	struct cfs_cpt_table *cptab = NULL;
> +	cpumask_var_t mask;
> +	int cpt = 0;
> +	int num;
> +	int rc;
> +	int i;
> +
> +	rc = cfs_cpt_num_estimate();
> +	if (ncpt <= 0)
> +		ncpt = rc;
> +
> +	if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
> +		CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
> +		      ncpt, rc);
> +	}
> +
> +	if (num_online_cpus() % ncpt) {
> +		CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
> +		       (int)num_online_cpus(), ncpt);
> +		goto failed;
> +	}
> +
> +	cptab = cfs_cpt_table_alloc(ncpt);
> +	if (!cptab) {
> +		CERROR("Failed to allocate CPU map(%d)\n", ncpt);
> +		goto failed;
> +	}
> +
> +	num = num_online_cpus() / ncpt;
> +	if (!num) {
> +		CERROR("CPU changed while setting CPU partition\n");
> +		goto failed;
> +	}
> +
> +	if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
> +		CERROR("Failed to allocate scratch cpumask\n");
> +		goto failed;
> +	}
> +
> +	for_each_online_node(i) {
> +		cfs_node_to_cpumask(i, mask);
> +
> +		while (!cpumask_empty(mask)) {
> +			struct cfs_cpu_partition *part;
> +			int n;
> +
> +			/*
> +			 * Each emulated NUMA node has all allowed CPUs in
> +			 * the mask.
> +			 * End loop when all partitions have assigned CPUs.
> +			 */
> +			if (cpt == ncpt)
> +				break;
> +
> +			part = &cptab->ctb_parts[cpt];
> +
> +			n = num - cpumask_weight(part->cpt_cpumask);
> +			LASSERT(n > 0);
> +
> +			rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
> +			if (rc < 0)
> +				goto failed_mask;
> +
> +			LASSERT(num >= cpumask_weight(part->cpt_cpumask));
> +			if (num == cpumask_weight(part->cpt_cpumask))
> +				cpt++;
> +		}
> +	}
> +
> +	if (cpt != ncpt ||
> +	    num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
> +		CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
> +		       cptab->ctb_nparts, num, cpt,
> +		       cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
> +		goto failed_mask;
> +	}
> +
> +	free_cpumask_var(mask);
> +
> +	return cptab;
> +
> + failed_mask:
> +	free_cpumask_var(mask);
> + failed:
> +	CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
> +	       ncpt, num_online_nodes(), num_online_cpus());
> +
> +	if (cptab)
> +		cfs_cpt_table_free(cptab);
> +
> +	return NULL;
> +}
> +
> +static struct cfs_cpt_table *
> +cfs_cpt_table_create_pattern(char *pattern)
> +{
> +	struct cfs_cpt_table *cptab;
> +	char *str;
> +	int node = 0;
> +	int high;
> +	int ncpt = 0;
> +	int cpt;
> +	int rc;
> +	int c;
> +	int i;
> +
> +	str = strim(pattern);
> +	if (*str == 'n' || *str == 'N') {
> +		pattern = str + 1;
> +		if (*pattern != '\0') {
> +			node = 1;
> +		} else { /* shortcut to create CPT from NUMA & CPU topology */
> +			node = -1;
> +			ncpt = num_online_nodes();
> +		}
> +	}
> +
> +	if (!ncpt) { /* scanning bracket which is mark of partition */
> +		for (str = pattern;; str++, ncpt++) {
> +			str = strchr(str, '[');
> +			if (!str)
> +				break;
> +		}
> +	}
> +
> +	if (!ncpt ||
> +	    (node && ncpt > num_online_nodes()) ||
> +	    (!node && ncpt > num_online_cpus())) {
> +		CERROR("Invalid pattern %s, or too many partitions %d\n",
> +		       pattern, ncpt);
> +		return NULL;
> +	}
> +
> +	cptab = cfs_cpt_table_alloc(ncpt);
> +	if (!cptab) {
> +		CERROR("Failed to allocate cpu partition table\n");
> +		return NULL;
> +	}
> +
> +	if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
> +		cpt = 0;
> +
> +		for_each_online_node(i) {
> +			if (cpt >= ncpt) {
> +				CERROR("CPU changed while setting CPU partition table, %d/%d\n",
> +				       cpt, ncpt);
> +				goto failed;
> +			}
> +
> +			rc = cfs_cpt_set_node(cptab, cpt++, i);
> +			if (!rc)
> +				goto failed;
> +		}
> +		return cptab;
> +	}
> +
> +	high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
> +
> +	for (str = strim(pattern), c = 0;; c++) {
> +		struct cfs_range_expr *range;
> +		struct cfs_expr_list *el;
> +		char *bracket = strchr(str, '[');
> +		int n;
> +
> +		if (!bracket) {
> +			if (*str) {
> +				CERROR("Invalid pattern %s\n", str);
> +				goto failed;
> +			}
> +			if (c != ncpt) {
> +				CERROR("expect %d partitions but found %d\n",
> +				       ncpt, c);
> +				goto failed;
> +			}
> +			break;
> +		}
> +
> +		if (sscanf(str, "%d%n", &cpt, &n) < 1) {
> +			CERROR("Invalid cpu pattern %s\n", str);
> +			goto failed;
> +		}
> +
> +		if (cpt < 0 || cpt >= ncpt) {
> +			CERROR("Invalid partition id %d, total partitions %d\n",
> +			       cpt, ncpt);
> +			goto failed;
> +		}
> +
> +		if (cfs_cpt_weight(cptab, cpt)) {
> +			CERROR("Partition %d has already been set.\n", cpt);
> +			goto failed;
> +		}
> +
> +		str = strim(str + n);
> +		if (str != bracket) {
> +			CERROR("Invalid pattern %s\n", str);
> +			goto failed;
> +		}
> +
> +		bracket = strchr(str, ']');
> +		if (!bracket) {
> +			CERROR("missing right bracket for cpt %d, %s\n",
> +			       cpt, str);
> +			goto failed;
> +		}
> +
> +		if (cfs_expr_list_parse(str, (bracket - str) + 1,
> +					0, high, &el)) {
> +			CERROR("Can't parse number range: %s\n", str);
> +			goto failed;
> +		}
> +
> +		list_for_each_entry(range, &el->el_exprs, re_link) {
> +			for (i = range->re_lo; i <= range->re_hi; i++) {
> +				if ((i - range->re_lo) % range->re_stride)
> +					continue;
> +
> +				rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
> +					    cfs_cpt_set_cpu(cptab, cpt, i);
> +				if (!rc) {
> +					cfs_expr_list_free(el);
> +					goto failed;
> +				}
> +			}
> +		}
> +
> +		cfs_expr_list_free(el);
> +
> +		if (!cfs_cpt_online(cptab, cpt)) {
> +			CERROR("No online CPU is found on partition %d\n", cpt);
> +			goto failed;
> +		}
> +
> +		str = strim(bracket + 1);
> +	}
> +
> +	return cptab;
> +
> + failed:
> +	cfs_cpt_table_free(cptab);
> +	return NULL;
> +}
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +static enum cpuhp_state lustre_cpu_online;
> +
> +static void cfs_cpu_incr_cpt_version(void)
> +{
> +	spin_lock(&cpt_data.cpt_lock);
> +	cpt_data.cpt_version++;
> +	spin_unlock(&cpt_data.cpt_lock);
> +}
> +
> +static int cfs_cpu_online(unsigned int cpu)
> +{
> +	cfs_cpu_incr_cpt_version();
> +	return 0;
> +}
> +
> +static int cfs_cpu_dead(unsigned int cpu)
> +{
> +	bool warn;
> +
> +	cfs_cpu_incr_cpt_version();
> +
> +	mutex_lock(&cpt_data.cpt_mutex);
> +	/* if all HTs in a core are offline, it may break affinity */
> +	cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
> +	warn = cpumask_any_and(cpt_data.cpt_cpumask,
> +			       cpu_online_mask) >= nr_cpu_ids;
> +	mutex_unlock(&cpt_data.cpt_mutex);
> +	CDEBUG(warn ? D_WARNING : D_INFO,
> +	       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
> +	       cpu);
> +	return 0;
> +}
> +#endif
> +
> +void
> +cfs_cpu_fini(void)
> +{
> +	if (cfs_cpt_table)
> +		cfs_cpt_table_free(cfs_cpt_table);
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +	if (lustre_cpu_online > 0)
> +		cpuhp_remove_state_nocalls(lustre_cpu_online);
> +	cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
> +#endif
> +	free_cpumask_var(cpt_data.cpt_cpumask);
> +}
> +
> +int
> +cfs_cpu_init(void)
> +{
> +	int ret = 0;
> +
> +	LASSERT(!cfs_cpt_table);
> +
> +	memset(&cpt_data, 0, sizeof(cpt_data));
> +
> +	if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
> +		CERROR("Failed to allocate scratch buffer\n");
> +		return -1;
> +	}
> +
> +	spin_lock_init(&cpt_data.cpt_lock);
> +	mutex_init(&cpt_data.cpt_mutex);
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +	ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
> +					"staging/lustre/cfe:dead", NULL,
> +					cfs_cpu_dead);
> +	if (ret < 0)
> +		goto failed;
> +	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> +					"staging/lustre/cfe:online",
> +					cfs_cpu_online, NULL);
> +	if (ret < 0)
> +		goto failed;
> +	lustre_cpu_online = ret;
> +#endif
> +	ret = -EINVAL;
> +
> +	if (*cpu_pattern) {
> +		char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
> +
> +		if (!cpu_pattern_dup) {
> +			CERROR("Failed to duplicate cpu_pattern\n");
> +			goto failed;
> +		}
> +
> +		cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup);
> +		kfree(cpu_pattern_dup);
> +		if (!cfs_cpt_table) {
> +			CERROR("Failed to create cptab from pattern %s\n",
> +			       cpu_pattern);
> +			goto failed;
> +		}
> +
> +	} else {
> +		cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
> +		if (!cfs_cpt_table) {
> +			CERROR("Failed to create ptable with npartitions %d\n",
> +			       cpu_npartitions);
> +			goto failed;
> +		}
> +	}
> +
> +	spin_lock(&cpt_data.cpt_lock);
> +	if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
> +		spin_unlock(&cpt_data.cpt_lock);
> +		CERROR("CPU hotplug/unplug during setup\n");
> +		goto failed;
> +	}
> +	spin_unlock(&cpt_data.cpt_lock);
> +
> +	LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
> +		 num_online_nodes(), num_online_cpus(),
> +		 cfs_cpt_number(cfs_cpt_table));
> +	return 0;
> +
> + failed:
> +	cfs_cpu_fini();
> +	return ret;
> +}
> +
> +#else /* ! CONFIG_SMP */
> +
>  void
>  cfs_cpu_fini(void)
>  {
> diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
> deleted file mode 100644
> index 388521e4e354..000000000000
> --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
> +++ /dev/null
> @@ -1,1079 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * GPL HEADER START
> - *
> - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 only,
> - * as published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful, but
> - * WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * General Public License version 2 for more details (a copy is included
> - * in the LICENSE file that accompanied this code).
> - *
> - * GPL HEADER END
> - */
> -/*
> - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
> - *
> - * Copyright (c) 2012, 2015 Intel Corporation.
> - */
> -/*
> - * This file is part of Lustre, http://www.lustre.org/
> - * Lustre is a trademark of Sun Microsystems, Inc.
> - *
> - * Author: liang@...mcloud.com
> - */
> -
> -#define DEBUG_SUBSYSTEM S_LNET
> -
> -#include <linux/cpu.h>
> -#include <linux/sched.h>
> -#include <linux/libcfs/libcfs.h>
> -
> -#ifdef CONFIG_SMP
> -
> -/**
> - * modparam for setting number of partitions
> - *
> - *  0 : estimate best value based on cores or NUMA nodes
> - *  1 : disable multiple partitions
> - * >1 : specify number of partitions
> - */
> -static int	cpu_npartitions;
> -module_param(cpu_npartitions, int, 0444);
> -MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
> -
> -/**
> - * modparam for setting CPU partitions patterns:
> - *
> - * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
> - *      number in bracket is processor ID (core or HT)
> - *
> - * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
> - *       are NUMA node ID, number before bracket is CPU partition ID.
> - *
> - * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
> - *
> - * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
> - */
> -static char	*cpu_pattern = "N";
> -module_param(cpu_pattern, charp, 0444);
> -MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
> -
> -struct cfs_cpt_data {
> -	/* serialize hotplug etc */
> -	spinlock_t		cpt_lock;
> -	/* reserved for hotplug */
> -	unsigned long		cpt_version;
> -	/* mutex to protect cpt_cpumask */
> -	struct mutex		cpt_mutex;
> -	/* scratch buffer for set/unset_node */
> -	cpumask_var_t		cpt_cpumask;
> -};
> -
> -static struct cfs_cpt_data	cpt_data;
> -
> -static void
> -cfs_node_to_cpumask(int node, cpumask_t *mask)
> -{
> -	const cpumask_t *tmp = cpumask_of_node(node);
> -
> -	if (tmp)
> -		cpumask_copy(mask, tmp);
> -	else
> -		cpumask_clear(mask);
> -}
> -
> -void
> -cfs_cpt_table_free(struct cfs_cpt_table *cptab)
> -{
> -	int i;
> -
> -	kvfree(cptab->ctb_cpu2cpt);
> -
> -	for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
> -		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
> -
> -		kfree(part->cpt_nodemask);
> -		free_cpumask_var(part->cpt_cpumask);
> -	}
> -
> -	kvfree(cptab->ctb_parts);
> -
> -	kfree(cptab->ctb_nodemask);
> -	free_cpumask_var(cptab->ctb_cpumask);
> -
> -	kfree(cptab);
> -}
> -EXPORT_SYMBOL(cfs_cpt_table_free);
> -
> -struct cfs_cpt_table *
> -cfs_cpt_table_alloc(unsigned int ncpt)
> -{
> -	struct cfs_cpt_table *cptab;
> -	int i;
> -
> -	cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
> -	if (!cptab)
> -		return NULL;
> -
> -	cptab->ctb_nparts = ncpt;
> -
> -	cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
> -				      GFP_NOFS);
> -	if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
> -	    !cptab->ctb_nodemask)
> -		goto failed;
> -
> -	cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
> -					    sizeof(cptab->ctb_cpu2cpt[0]),
> -					    GFP_KERNEL);
> -	if (!cptab->ctb_cpu2cpt)
> -		goto failed;
> -
> -	memset(cptab->ctb_cpu2cpt, -1,
> -	       num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
> -
> -	cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
> -					  GFP_KERNEL);
> -	if (!cptab->ctb_parts)
> -		goto failed;
> -
> -	for (i = 0; i < ncpt; i++) {
> -		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
> -
> -		part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
> -					     GFP_NOFS);
> -		if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
> -		    !part->cpt_nodemask)
> -			goto failed;
> -	}
> -
> -	spin_lock(&cpt_data.cpt_lock);
> -	/* Reserved for hotplug */
> -	cptab->ctb_version = cpt_data.cpt_version;
> -	spin_unlock(&cpt_data.cpt_lock);
> -
> -	return cptab;
> -
> - failed:
> -	cfs_cpt_table_free(cptab);
> -	return NULL;
> -}
> -EXPORT_SYMBOL(cfs_cpt_table_alloc);
> -
> -int
> -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
> -{
> -	char *tmp = buf;
> -	int rc = 0;
> -	int i;
> -	int j;
> -
> -	for (i = 0; i < cptab->ctb_nparts; i++) {
> -		if (len > 0) {
> -			rc = snprintf(tmp, len, "%d\t: ", i);
> -			len -= rc;
> -		}
> -
> -		if (len <= 0) {
> -			rc = -EFBIG;
> -			goto out;
> -		}
> -
> -		tmp += rc;
> -		for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
> -			rc = snprintf(tmp, len, "%d ", j);
> -			len -= rc;
> -			if (len <= 0) {
> -				rc = -EFBIG;
> -				goto out;
> -			}
> -			tmp += rc;
> -		}
> -
> -		*tmp = '\n';
> -		tmp++;
> -		len--;
> -	}
> -
> - out:
> -	if (rc < 0)
> -		return rc;
> -
> -	return tmp - buf;
> -}
> -EXPORT_SYMBOL(cfs_cpt_table_print);
> -
> -int
> -cfs_cpt_number(struct cfs_cpt_table *cptab)
> -{
> -	return cptab->ctb_nparts;
> -}
> -EXPORT_SYMBOL(cfs_cpt_number);
> -
> -int
> -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	return cpt == CFS_CPT_ANY ?
> -	       cpumask_weight(cptab->ctb_cpumask) :
> -	       cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
> -}
> -EXPORT_SYMBOL(cfs_cpt_weight);
> -
> -int
> -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	return cpt == CFS_CPT_ANY ?
> -	       cpumask_any_and(cptab->ctb_cpumask,
> -			       cpu_online_mask) < nr_cpu_ids :
> -	       cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
> -			       cpu_online_mask) < nr_cpu_ids;
> -}
> -EXPORT_SYMBOL(cfs_cpt_online);
> -
> -cpumask_var_t *
> -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	return cpt == CFS_CPT_ANY ?
> -	       &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
> -}
> -EXPORT_SYMBOL(cfs_cpt_cpumask);
> -
> -nodemask_t *
> -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	return cpt == CFS_CPT_ANY ?
> -	       cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
> -}
> -EXPORT_SYMBOL(cfs_cpt_nodemask);
> -
> -int
> -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
> -{
> -	int node;
> -
> -	LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
> -
> -	if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
> -		CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
> -		return 0;
> -	}
> -
> -	if (cptab->ctb_cpu2cpt[cpu] != -1) {
> -		CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
> -		       cpu, cptab->ctb_cpu2cpt[cpu]);
> -		return 0;
> -	}
> -
> -	cptab->ctb_cpu2cpt[cpu] = cpt;
> -
> -	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
> -	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
> -
> -	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
> -	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
> -
> -	node = cpu_to_node(cpu);
> -
> -	/* first CPU of @node in this CPT table */
> -	if (!node_isset(node, *cptab->ctb_nodemask))
> -		node_set(node, *cptab->ctb_nodemask);
> -
> -	/* first CPU of @node in this partition */
> -	if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
> -		node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
> -
> -	return 1;
> -}
> -EXPORT_SYMBOL(cfs_cpt_set_cpu);
> -
> -void
> -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
> -{
> -	int node;
> -	int i;
> -
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	if (cpu < 0 || cpu >= nr_cpu_ids) {
> -		CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
> -		return;
> -	}
> -
> -	if (cpt == CFS_CPT_ANY) {
> -		/* caller doesn't know the partition ID */
> -		cpt = cptab->ctb_cpu2cpt[cpu];
> -		if (cpt < 0) { /* not set in this CPT-table */
> -			CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
> -			       cpt, cptab);
> -			return;
> -		}
> -
> -	} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
> -		CDEBUG(D_INFO,
> -		       "CPU %d is not in cpu-partition %d\n", cpu, cpt);
> -		return;
> -	}
> -
> -	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
> -	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
> -
> -	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
> -	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
> -	cptab->ctb_cpu2cpt[cpu] = -1;
> -
> -	node = cpu_to_node(cpu);
> -
> -	LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
> -	LASSERT(node_isset(node, *cptab->ctb_nodemask));
> -
> -	for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
> -		/* this CPT has other CPU belonging to this node? */
> -		if (cpu_to_node(i) == node)
> -			break;
> -	}
> -
> -	if (i >= nr_cpu_ids)
> -		node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
> -
> -	for_each_cpu(i, cptab->ctb_cpumask) {
> -		/* this CPT-table has other CPU belonging to this node? */
> -		if (cpu_to_node(i) == node)
> -			break;
> -	}
> -
> -	if (i >= nr_cpu_ids)
> -		node_clear(node, *cptab->ctb_nodemask);
> -}
> -EXPORT_SYMBOL(cfs_cpt_unset_cpu);
> -
> -int
> -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
> -{
> -	int i;
> -
> -	if (!cpumask_weight(mask) ||
> -	    cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
> -		CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
> -		       cpt);
> -		return 0;
> -	}
> -
> -	for_each_cpu(i, mask) {
> -		if (!cfs_cpt_set_cpu(cptab, cpt, i))
> -			return 0;
> -	}
> -
> -	return 1;
> -}
> -EXPORT_SYMBOL(cfs_cpt_set_cpumask);
> -
> -void
> -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
> -{
> -	int i;
> -
> -	for_each_cpu(i, mask)
> -		cfs_cpt_unset_cpu(cptab, cpt, i);
> -}
> -EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
> -
> -int
> -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
> -{
> -	int rc;
> -
> -	if (node < 0 || node >= MAX_NUMNODES) {
> -		CDEBUG(D_INFO,
> -		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
> -		return 0;
> -	}
> -
> -	mutex_lock(&cpt_data.cpt_mutex);
> -
> -	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
> -
> -	rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
> -
> -	mutex_unlock(&cpt_data.cpt_mutex);
> -
> -	return rc;
> -}
> -EXPORT_SYMBOL(cfs_cpt_set_node);
> -
> -void
> -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
> -{
> -	if (node < 0 || node >= MAX_NUMNODES) {
> -		CDEBUG(D_INFO,
> -		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
> -		return;
> -	}
> -
> -	mutex_lock(&cpt_data.cpt_mutex);
> -
> -	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
> -
> -	cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
> -
> -	mutex_unlock(&cpt_data.cpt_mutex);
> -}
> -EXPORT_SYMBOL(cfs_cpt_unset_node);
> -
> -int
> -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
> -{
> -	int i;
> -
> -	for_each_node_mask(i, *mask) {
> -		if (!cfs_cpt_set_node(cptab, cpt, i))
> -			return 0;
> -	}
> -
> -	return 1;
> -}
> -EXPORT_SYMBOL(cfs_cpt_set_nodemask);
> -
> -void
> -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
> -{
> -	int i;
> -
> -	for_each_node_mask(i, *mask)
> -		cfs_cpt_unset_node(cptab, cpt, i);
> -}
> -EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
> -
> -void
> -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	int last;
> -	int i;
> -
> -	if (cpt == CFS_CPT_ANY) {
> -		last = cptab->ctb_nparts - 1;
> -		cpt = 0;
> -	} else {
> -		last = cpt;
> -	}
> -
> -	for (; cpt <= last; cpt++) {
> -		for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
> -			cfs_cpt_unset_cpu(cptab, cpt, i);
> -	}
> -}
> -EXPORT_SYMBOL(cfs_cpt_clear);
> -
> -int
> -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	nodemask_t *mask;
> -	int weight;
> -	int rotor;
> -	int node;
> -
> -	/* convert CPU partition ID to HW node id */
> -
> -	if (cpt < 0 || cpt >= cptab->ctb_nparts) {
> -		mask = cptab->ctb_nodemask;
> -		rotor = cptab->ctb_spread_rotor++;
> -	} else {
> -		mask = cptab->ctb_parts[cpt].cpt_nodemask;
> -		rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
> -	}
> -
> -	weight = nodes_weight(*mask);
> -	LASSERT(weight > 0);
> -
> -	rotor %= weight;
> -
> -	for_each_node_mask(node, *mask) {
> -		if (!rotor--)
> -			return node;
> -	}
> -
> -	LBUG();
> -	return 0;
> -}
> -EXPORT_SYMBOL(cfs_cpt_spread_node);
> -
> -int
> -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
> -{
> -	int cpu;
> -	int cpt;
> -
> -	preempt_disable();
> -	cpu = smp_processor_id();
> -	cpt = cptab->ctb_cpu2cpt[cpu];
> -
> -	if (cpt < 0 && remap) {
> -		/* don't return negative value for safety of upper layer,
> -		 * instead we shadow the unknown cpu to a valid partition ID
> -		 */
> -		cpt = cpu % cptab->ctb_nparts;
> -	}
> -	preempt_enable();
> -	return cpt;
> -}
> -EXPORT_SYMBOL(cfs_cpt_current);
> -
> -int
> -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
> -{
> -	LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
> -
> -	return cptab->ctb_cpu2cpt[cpu];
> -}
> -EXPORT_SYMBOL(cfs_cpt_of_cpu);
> -
> -int
> -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
> -{
> -	cpumask_var_t *cpumask;
> -	nodemask_t *nodemask;
> -	int rc;
> -	int i;
> -
> -	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
> -
> -	if (cpt == CFS_CPT_ANY) {
> -		cpumask = &cptab->ctb_cpumask;
> -		nodemask = cptab->ctb_nodemask;
> -	} else {
> -		cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
> -		nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
> -	}
> -
> -	if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
> -		CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
> -		       cpt);
> -		return -EINVAL;
> -	}
> -
> -	for_each_online_cpu(i) {
> -		if (cpumask_test_cpu(i, *cpumask))
> -			continue;
> -
> -		rc = set_cpus_allowed_ptr(current, *cpumask);
> -		set_mems_allowed(*nodemask);
> -		if (!rc)
> -			schedule(); /* switch to allowed CPU */
> -
> -		return rc;
> -	}
> -
> -	/* don't need to set affinity because all online CPUs are covered */
> -	return 0;
> -}
> -EXPORT_SYMBOL(cfs_cpt_bind);
> -
> -/**
> - * Choose max to \a number CPUs from \a node and set them in \a cpt.
> - * We always prefer to choose CPU in the same core/socket.
> - */
> -static int
> -cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
> -		     cpumask_t *node, int number)
> -{
> -	cpumask_var_t socket;
> -	cpumask_var_t core;
> -	int rc = 0;
> -	int cpu;
> -
> -	LASSERT(number > 0);
> -
> -	if (number >= cpumask_weight(node)) {
> -		while (!cpumask_empty(node)) {
> -			cpu = cpumask_first(node);
> -
> -			rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
> -			if (!rc)
> -				return -EINVAL;
> -			cpumask_clear_cpu(cpu, node);
> -		}
> -		return 0;
> -	}
> -
> -	/*
> -	 * Allocate scratch buffers
> -	 * As we cannot initialize a cpumask_var_t, we need
> -	 * to alloc both before we can risk trying to free either
> -	 */
> -	if (!zalloc_cpumask_var(&socket, GFP_NOFS))
> -		rc = -ENOMEM;
> -	if (!zalloc_cpumask_var(&core, GFP_NOFS))
> -		rc = -ENOMEM;
> -	if (rc)
> -		goto out;
> -
> -	while (!cpumask_empty(node)) {
> -		cpu = cpumask_first(node);
> -
> -		/* get cpumask for cores in the same socket */
> -		cpumask_copy(socket, topology_core_cpumask(cpu));
> -		cpumask_and(socket, socket, node);
> -
> -		LASSERT(!cpumask_empty(socket));
> -
> -		while (!cpumask_empty(socket)) {
> -			int i;
> -
> -			/* get cpumask for hts in the same core */
> -			cpumask_copy(core, topology_sibling_cpumask(cpu));
> -			cpumask_and(core, core, node);
> -
> -			LASSERT(!cpumask_empty(core));
> -
> -			for_each_cpu(i, core) {
> -				cpumask_clear_cpu(i, socket);
> -				cpumask_clear_cpu(i, node);
> -
> -				rc = cfs_cpt_set_cpu(cptab, cpt, i);
> -				if (!rc) {
> -					rc = -EINVAL;
> -					goto out;
> -				}
> -
> -				if (!--number)
> -					goto out;
> -			}
> -			cpu = cpumask_first(socket);
> -		}
> -	}
> -
> -out:
> -	free_cpumask_var(socket);
> -	free_cpumask_var(core);
> -	return rc;
> -}
> -
> -#define CPT_WEIGHT_MIN  4u
> -
> -static unsigned int
> -cfs_cpt_num_estimate(void)
> -{
> -	unsigned int nnode = num_online_nodes();
> -	unsigned int ncpu = num_online_cpus();
> -	unsigned int ncpt;
> -
> -	if (ncpu <= CPT_WEIGHT_MIN) {
> -		ncpt = 1;
> -		goto out;
> -	}
> -
> -	/* generate reasonable number of CPU partitions based on total number
> -	 * of CPUs, Preferred N should be power2 and match this condition:
> -	 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
> -	 */
> -	for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
> -		;
> -
> -	if (ncpt <= nnode) { /* fat numa system */
> -		while (nnode > ncpt)
> -			nnode >>= 1;
> -
> -	} else { /* ncpt > nnode */
> -		while ((nnode << 1) <= ncpt)
> -			nnode <<= 1;
> -	}
> -
> -	ncpt = nnode;
> -
> -out:
> -#if (BITS_PER_LONG == 32)
> -	/* config many CPU partitions on 32-bit system could consume
> -	 * too much memory
> -	 */
> -	ncpt = min(2U, ncpt);
> -#endif
> -	while (ncpu % ncpt)
> -		ncpt--; /* worst case is 1 */
> -
> -	return ncpt;
> -}
> -
> -static struct cfs_cpt_table *
> -cfs_cpt_table_create(int ncpt)
> -{
> -	struct cfs_cpt_table *cptab = NULL;
> -	cpumask_var_t mask;
> -	int cpt = 0;
> -	int num;
> -	int rc;
> -	int i;
> -
> -	rc = cfs_cpt_num_estimate();
> -	if (ncpt <= 0)
> -		ncpt = rc;
> -
> -	if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
> -		CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
> -		      ncpt, rc);
> -	}
> -
> -	if (num_online_cpus() % ncpt) {
> -		CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
> -		       (int)num_online_cpus(), ncpt);
> -		goto failed;
> -	}
> -
> -	cptab = cfs_cpt_table_alloc(ncpt);
> -	if (!cptab) {
> -		CERROR("Failed to allocate CPU map(%d)\n", ncpt);
> -		goto failed;
> -	}
> -
> -	num = num_online_cpus() / ncpt;
> -	if (!num) {
> -		CERROR("CPU changed while setting CPU partition\n");
> -		goto failed;
> -	}
> -
> -	if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
> -		CERROR("Failed to allocate scratch cpumask\n");
> -		goto failed;
> -	}
> -
> -	for_each_online_node(i) {
> -		cfs_node_to_cpumask(i, mask);
> -
> -		while (!cpumask_empty(mask)) {
> -			struct cfs_cpu_partition *part;
> -			int n;
> -
> -			/*
> -			 * Each emulated NUMA node has all allowed CPUs in
> -			 * the mask.
> -			 * End loop when all partitions have assigned CPUs.
> -			 */
> -			if (cpt == ncpt)
> -				break;
> -
> -			part = &cptab->ctb_parts[cpt];
> -
> -			n = num - cpumask_weight(part->cpt_cpumask);
> -			LASSERT(n > 0);
> -
> -			rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
> -			if (rc < 0)
> -				goto failed_mask;
> -
> -			LASSERT(num >= cpumask_weight(part->cpt_cpumask));
> -			if (num == cpumask_weight(part->cpt_cpumask))
> -				cpt++;
> -		}
> -	}
> -
> -	if (cpt != ncpt ||
> -	    num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
> -		CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
> -		       cptab->ctb_nparts, num, cpt,
> -		       cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
> -		goto failed_mask;
> -	}
> -
> -	free_cpumask_var(mask);
> -
> -	return cptab;
> -
> - failed_mask:
> -	free_cpumask_var(mask);
> - failed:
> -	CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
> -	       ncpt, num_online_nodes(), num_online_cpus());
> -
> -	if (cptab)
> -		cfs_cpt_table_free(cptab);
> -
> -	return NULL;
> -}
> -
> -static struct cfs_cpt_table *
> -cfs_cpt_table_create_pattern(char *pattern)
> -{
> -	struct cfs_cpt_table *cptab;
> -	char *str;
> -	int node = 0;
> -	int high;
> -	int ncpt = 0;
> -	int cpt;
> -	int rc;
> -	int c;
> -	int i;
> -
> -	str = strim(pattern);
> -	if (*str == 'n' || *str == 'N') {
> -		pattern = str + 1;
> -		if (*pattern != '\0') {
> -			node = 1;
> -		} else { /* shortcut to create CPT from NUMA & CPU topology */
> -			node = -1;
> -			ncpt = num_online_nodes();
> -		}
> -	}
> -
> -	if (!ncpt) { /* scanning bracket which is mark of partition */
> -		for (str = pattern;; str++, ncpt++) {
> -			str = strchr(str, '[');
> -			if (!str)
> -				break;
> -		}
> -	}
> -
> -	if (!ncpt ||
> -	    (node && ncpt > num_online_nodes()) ||
> -	    (!node && ncpt > num_online_cpus())) {
> -		CERROR("Invalid pattern %s, or too many partitions %d\n",
> -		       pattern, ncpt);
> -		return NULL;
> -	}
> -
> -	cptab = cfs_cpt_table_alloc(ncpt);
> -	if (!cptab) {
> -		CERROR("Failed to allocate cpu partition table\n");
> -		return NULL;
> -	}
> -
> -	if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
> -		cpt = 0;
> -
> -		for_each_online_node(i) {
> -			if (cpt >= ncpt) {
> -				CERROR("CPU changed while setting CPU partition table, %d/%d\n",
> -				       cpt, ncpt);
> -				goto failed;
> -			}
> -
> -			rc = cfs_cpt_set_node(cptab, cpt++, i);
> -			if (!rc)
> -				goto failed;
> -		}
> -		return cptab;
> -	}
> -
> -	high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
> -
> -	for (str = strim(pattern), c = 0;; c++) {
> -		struct cfs_range_expr *range;
> -		struct cfs_expr_list *el;
> -		char *bracket = strchr(str, '[');
> -		int n;
> -
> -		if (!bracket) {
> -			if (*str) {
> -				CERROR("Invalid pattern %s\n", str);
> -				goto failed;
> -			}
> -			if (c != ncpt) {
> -				CERROR("expect %d partitions but found %d\n",
> -				       ncpt, c);
> -				goto failed;
> -			}
> -			break;
> -		}
> -
> -		if (sscanf(str, "%d%n", &cpt, &n) < 1) {
> -			CERROR("Invalid cpu pattern %s\n", str);
> -			goto failed;
> -		}
> -
> -		if (cpt < 0 || cpt >= ncpt) {
> -			CERROR("Invalid partition id %d, total partitions %d\n",
> -			       cpt, ncpt);
> -			goto failed;
> -		}
> -
> -		if (cfs_cpt_weight(cptab, cpt)) {
> -			CERROR("Partition %d has already been set.\n", cpt);
> -			goto failed;
> -		}
> -
> -		str = strim(str + n);
> -		if (str != bracket) {
> -			CERROR("Invalid pattern %s\n", str);
> -			goto failed;
> -		}
> -
> -		bracket = strchr(str, ']');
> -		if (!bracket) {
> -			CERROR("missing right bracket for cpt %d, %s\n",
> -			       cpt, str);
> -			goto failed;
> -		}
> -
> -		if (cfs_expr_list_parse(str, (bracket - str) + 1,
> -					0, high, &el)) {
> -			CERROR("Can't parse number range: %s\n", str);
> -			goto failed;
> -		}
> -
> -		list_for_each_entry(range, &el->el_exprs, re_link) {
> -			for (i = range->re_lo; i <= range->re_hi; i++) {
> -				if ((i - range->re_lo) % range->re_stride)
> -					continue;
> -
> -				rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
> -					    cfs_cpt_set_cpu(cptab, cpt, i);
> -				if (!rc) {
> -					cfs_expr_list_free(el);
> -					goto failed;
> -				}
> -			}
> -		}
> -
> -		cfs_expr_list_free(el);
> -
> -		if (!cfs_cpt_online(cptab, cpt)) {
> -			CERROR("No online CPU is found on partition %d\n", cpt);
> -			goto failed;
> -		}
> -
> -		str = strim(bracket + 1);
> -	}
> -
> -	return cptab;
> -
> - failed:
> -	cfs_cpt_table_free(cptab);
> -	return NULL;
> -}
> -
> -#ifdef CONFIG_HOTPLUG_CPU
> -static enum cpuhp_state lustre_cpu_online;
> -
> -static void cfs_cpu_incr_cpt_version(void)
> -{
> -	spin_lock(&cpt_data.cpt_lock);
> -	cpt_data.cpt_version++;
> -	spin_unlock(&cpt_data.cpt_lock);
> -}
> -
> -static int cfs_cpu_online(unsigned int cpu)
> -{
> -	cfs_cpu_incr_cpt_version();
> -	return 0;
> -}
> -
> -static int cfs_cpu_dead(unsigned int cpu)
> -{
> -	bool warn;
> -
> -	cfs_cpu_incr_cpt_version();
> -
> -	mutex_lock(&cpt_data.cpt_mutex);
> -	/* if all HTs in a core are offline, it may break affinity */
> -	cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
> -	warn = cpumask_any_and(cpt_data.cpt_cpumask,
> -			       cpu_online_mask) >= nr_cpu_ids;
> -	mutex_unlock(&cpt_data.cpt_mutex);
> -	CDEBUG(warn ? D_WARNING : D_INFO,
> -	       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
> -	       cpu);
> -	return 0;
> -}
> -#endif
> -
> -void
> -cfs_cpu_fini(void)
> -{
> -	if (cfs_cpt_table)
> -		cfs_cpt_table_free(cfs_cpt_table);
> -
> -#ifdef CONFIG_HOTPLUG_CPU
> -	if (lustre_cpu_online > 0)
> -		cpuhp_remove_state_nocalls(lustre_cpu_online);
> -	cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
> -#endif
> -	free_cpumask_var(cpt_data.cpt_cpumask);
> -}
> -
> -int
> -cfs_cpu_init(void)
> -{
> -	int ret = 0;
> -
> -	LASSERT(!cfs_cpt_table);
> -
> -	memset(&cpt_data, 0, sizeof(cpt_data));
> -
> -	if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
> -		CERROR("Failed to allocate scratch buffer\n");
> -		return -1;
> -	}
> -
> -	spin_lock_init(&cpt_data.cpt_lock);
> -	mutex_init(&cpt_data.cpt_mutex);
> -
> -#ifdef CONFIG_HOTPLUG_CPU
> -	ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
> -					"staging/lustre/cfe:dead", NULL,
> -					cfs_cpu_dead);
> -	if (ret < 0)
> -		goto failed;
> -	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> -					"staging/lustre/cfe:online",
> -					cfs_cpu_online, NULL);
> -	if (ret < 0)
> -		goto failed;
> -	lustre_cpu_online = ret;
> -#endif
> -	ret = -EINVAL;
> -
> -	if (*cpu_pattern) {
> -		char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
> -
> -		if (!cpu_pattern_dup) {
> -			CERROR("Failed to duplicate cpu_pattern\n");
> -			goto failed;
> -		}
> -
> -		cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup);
> -		kfree(cpu_pattern_dup);
> -		if (!cfs_cpt_table) {
> -			CERROR("Failed to create cptab from pattern %s\n",
> -			       cpu_pattern);
> -			goto failed;
> -		}
> -
> -	} else {
> -		cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
> -		if (!cfs_cpt_table) {
> -			CERROR("Failed to create ptable with npartitions %d\n",
> -			       cpu_npartitions);
> -			goto failed;
> -		}
> -	}
> -
> -	spin_lock(&cpt_data.cpt_lock);
> -	if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
> -		spin_unlock(&cpt_data.cpt_lock);
> -		CERROR("CPU hotplug/unplug during setup\n");
> -		goto failed;
> -	}
> -	spin_unlock(&cpt_data.cpt_lock);
> -
> -	LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
> -		 num_online_nodes(), num_online_cpus(),
> -		 cfs_cpt_number(cfs_cpt_table));
> -	return 0;
> -
> - failed:
> -	cfs_cpu_fini();
> -	return ret;
> -}
> -
> -#endif
> 
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ