lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250408135202.3001047-1-joshua.hahnjy@gmail.com>
Date: Tue,  8 Apr 2025 06:52:01 -0700
From: Joshua Hahn <joshua.hahnjy@...il.com>
To: Rakie Kim <rakie.kim@...com>
Cc: akpm@...ux-foundation.org,
	gourry@...rry.net,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	linux-cxl@...r.kernel.org,
	joshua.hahnjy@...il.com,
	dan.j.williams@...el.com,
	ying.huang@...ux.alibaba.com,
	david@...hat.com,
	Jonathan.Cameron@...wei.com,
	osalvador@...e.de,
	kernel_team@...ynix.com,
	honggyu.kim@...com,
	yunjeong.mun@...com
Subject: Re: [PATCH v7 3/3] mm/mempolicy: Support memory hotplug in weighted interleave

On Tue,  8 Apr 2025 16:32:42 +0900 Rakie Kim <rakie.kim@...com> wrote:

Hi Rakie,

Looks good to me as well : -) Thank you for working on this!

Reviewed-by: Joshua Hahn <joshua.hahnjy@...il.com>

> The weighted interleave policy distributes page allocations across multiple
> NUMA nodes based on their performance weight, thereby improving memory
> bandwidth utilization. The weight values for each node are configured
> through sysfs.
> 
> Previously, sysfs entries for configuring weighted interleave were created
> for all possible nodes (N_POSSIBLE) at initialization, including nodes that
> might not have memory. However, not all nodes in N_POSSIBLE are usable at
> runtime, as some may remain memoryless or offline.
> This led to sysfs entries being created for unusable nodes, causing
> potential misconfiguration issues.
> 
> To address this issue, this patch modifies the sysfs creation logic to:
> 1) Limit sysfs entries to nodes that are online and have memory, avoiding
>    the creation of sysfs entries for nodes that cannot be used.
> 2) Support memory hotplug by dynamically adding and removing sysfs entries
>    based on whether a node transitions into or out of the N_MEMORY state.
> 
> Additionally, the patch ensures that sysfs attributes are properly managed
> when nodes go offline, preventing stale or redundant entries from persisting
> in the system.
> 
> By making these changes, the weighted interleave policy now manages its
> sysfs entries more efficiently, ensuring that only relevant nodes are
> considered for interleaving, and dynamically adapting to memory hotplug
> events.
> 
> Signed-off-by: Rakie Kim <rakie.kim@...com>
> Signed-off-by: Honggyu Kim <honggyu.kim@...com>
> Signed-off-by: Yunjeong Mun <yunjeong.mun@...com>
> Reviewed-by: Oscar Salvador <osalvador@...e.de>
> ---
>  mm/mempolicy.c | 106 ++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 83 insertions(+), 23 deletions(-)
> 
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 988575f29c53..9aa884107f4c 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -113,6 +113,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/tlb.h>
>  #include <linux/uaccess.h>
> +#include <linux/memory.h>
>  
>  #include "internal.h"
>  
> @@ -3421,6 +3422,7 @@ struct iw_node_attr {
>  
>  struct sysfs_wi_group {
>  	struct kobject wi_kobj;
> +	struct mutex kobj_lock;
>  	struct iw_node_attr *nattrs[];
>  };
>  
> @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
>  
>  static void sysfs_wi_node_delete(int nid)
>  {
> -	if (!wi_group->nattrs[nid])
> +	struct iw_node_attr *attr;
> +
> +	if (nid < 0 || nid >= nr_node_ids)
> +		return;
> +
> +	mutex_lock(&wi_group->kobj_lock);
> +	attr = wi_group->nattrs[nid];
> +	if (!attr) {
> +		mutex_unlock(&wi_group->kobj_lock);
>  		return;
> +	}
> +
> +	wi_group->nattrs[nid] = NULL;
> +	mutex_unlock(&wi_group->kobj_lock);
>  
> -	sysfs_remove_file(&wi_group->wi_kobj,
> -			  &wi_group->nattrs[nid]->kobj_attr.attr);
> -	kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
> -	kfree(wi_group->nattrs[nid]);
> +	sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr);
> +	kfree(attr->kobj_attr.attr.name);
> +	kfree(attr);
>  }
>  
>  static void sysfs_wi_release(struct kobject *wi_kobj)
> @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = {
>  
>  static int sysfs_wi_node_add(int nid)
>  {
> -	struct iw_node_attr *node_attr;
> +	int ret = 0;
>  	char *name;
> +	struct iw_node_attr *new_attr = NULL;
>  
> -	node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
> -	if (!node_attr)
> +	if (nid < 0 || nid >= nr_node_ids) {
> +		pr_err("Invalid node id: %d\n", nid);
> +		return -EINVAL;
> +	}
> +
> +	new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL);
> +	if (!new_attr)
>  		return -ENOMEM;
>  
>  	name = kasprintf(GFP_KERNEL, "node%d", nid);
>  	if (!name) {
> -		kfree(node_attr);
> +		kfree(new_attr);
>  		return -ENOMEM;
>  	}
>  
> -	sysfs_attr_init(&node_attr->kobj_attr.attr);
> -	node_attr->kobj_attr.attr.name = name;
> -	node_attr->kobj_attr.attr.mode = 0644;
> -	node_attr->kobj_attr.show = node_show;
> -	node_attr->kobj_attr.store = node_store;
> -	node_attr->nid = nid;
> +	mutex_lock(&wi_group->kobj_lock);
> +	if (wi_group->nattrs[nid]) {
> +		mutex_unlock(&wi_group->kobj_lock);
> +		pr_info("Node [%d] already exists\n", nid);
> +		kfree(new_attr);
> +		kfree(name);
> +		return 0;
> +	}
> +	wi_group->nattrs[nid] = new_attr;
>  
> -	if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) {
> -		kfree(node_attr->kobj_attr.attr.name);
> -		kfree(node_attr);
> -		pr_err("failed to add attribute to weighted_interleave\n");
> -		return -ENOMEM;
> +	sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr);
> +	wi_group->nattrs[nid]->kobj_attr.attr.name = name;
> +	wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644;
> +	wi_group->nattrs[nid]->kobj_attr.show = node_show;
> +	wi_group->nattrs[nid]->kobj_attr.store = node_store;
> +	wi_group->nattrs[nid]->nid = nid;
> +
> +	ret = sysfs_create_file(&wi_group->wi_kobj,
> +				&wi_group->nattrs[nid]->kobj_attr.attr);
> +	if (ret) {
> +		kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
> +		kfree(wi_group->nattrs[nid]);
> +		wi_group->nattrs[nid] = NULL;
> +		pr_err("Failed to add attribute to weighted_interleave: %d\n", ret);
>  	}
> +	mutex_unlock(&wi_group->kobj_lock);
>  
> -	wi_group->nattrs[nid] = node_attr;
> -	return 0;
> +	return ret;
> +}
> +
> +static int wi_node_notifier(struct notifier_block *nb,
> +			       unsigned long action, void *data)
> +{
> +	int err;
> +	struct memory_notify *arg = data;
> +	int nid = arg->status_change_nid;
> +
> +	if (nid < 0)
> +		return NOTIFY_OK;
> +
> +	switch(action) {
> +	case MEM_ONLINE:
> +		err = sysfs_wi_node_add(nid);
> +		if (err)
> +			pr_err("failed to add sysfs [node%d]\n", nid);
> +		break;
> +	case MEM_OFFLINE:
> +		sysfs_wi_node_delete(nid);
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
>  }
>  
>  static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
> @@ -3534,13 +3589,17 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
>  			   GFP_KERNEL);
>  	if (!wi_group)
>  		return -ENOMEM;
> +	mutex_init(&wi_group->kobj_lock);
>  
>  	err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj,
>  				   "weighted_interleave");
>  	if (err)
>  		goto err_put_kobj;
>  
> -	for_each_node_state(nid, N_POSSIBLE) {
> +	for_each_online_node(nid) {
> +		if (!node_state(nid, N_MEMORY))
> +			continue;
> +
>  		err = sysfs_wi_node_add(nid);
>  		if (err) {
>  			pr_err("failed to add sysfs [node%d]\n", nid);
> @@ -3548,6 +3607,7 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
>  		}
>  	}
>  
> +	hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI);
>  	return 0;
>  
>  err_del_kobj:
> -- 
> 2.34.1

Sent using hkml (https://github.com/sjp38/hackermail)


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ