lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aV0kzBxg57Mlw8mx@tiehlicka>
Date: Tue, 6 Jan 2026 16:05:48 +0100
From: Michal Hocko <mhocko@...e.com>
To: Gregory Price <gourry@...rry.net>
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org, kernel-team@...a.com,
	david@...hat.com, osalvador@...e.de, gregkh@...uxfoundation.org,
	rafael@...nel.org, dakr@...nel.org, akpm@...ux-foundation.org,
	lorenzo.stoakes@...cle.com, Liam.Howlett@...cle.com, vbabka@...e.cz,
	rppt@...nel.org, surenb@...gle.com, hare@...e.de
Subject: Re: [RFC PATCH] memory,memory_hotplug: allow restricting memory
 blocks to zone movable

On Mon 05-01-26 15:36:11, Gregory Price wrote:
> It was reported (LPC 2025) that userland services which monitor memory
> blocks can cause hot-unplug to fail permanently.
>
> This can occur when drivers attempt to hot-remove memory in two phases
> (offline, remove), while a userland service detects the memory offline
> and re-onlines the memory into a zone which may prevent removal.

Are there more details about this?

> This patch allows a driver to specify that a given memory block is
> intended as ZONE_MOVABLE memory only (i.e. the system should try to
> protect its hot-unpluggability). This is done via an MHP flag and a new
> "movable_only" bool in `struct memory_block`.
> 
> Attempts to online a memory block with movable_only=true with any value
> other than MMOP_ONLINE_MOVABLE will fail with -EINVAL.
> 
> It is hard to catch all possible ways to implement offline/remove
> process, so a race condition here can clearly still occur if the
> userland service onlines the memory back into ZONE_MOVABLE, but it at
> least will not prevent the removal of a block at a later time.

Irrespective of the userspace note above (which seems like a policy that
should probably be re-evaluated or allow for a better fine tuning) I can
see some sense in drivers having a better control of which zones (kernel
vs. movable) can their managed memory fall into.

That being said, rather than movable_only, should we have a mask of
online types supported for the mem block?

> Suggested-by: Hannes Reinecke <hare@...e.de>
> Signed-off-by: Gregory Price <gourry@...rry.net>
> ---
>  drivers/base/memory.c          | 15 +++++++++++----
>  include/linux/memory.h         |  4 +++-
>  include/linux/memory_hotplug.h | 13 +++++++++++++
>  mm/memory_hotplug.c            | 12 +++++++++---
>  4 files changed, 36 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 6d84a02cfa5d..59512e4b8d62 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -374,6 +374,8 @@ static int memory_block_change_state(struct memory_block *mem,
>  
>  	if (to_state == MEM_OFFLINE)
>  		mem->state = MEM_GOING_OFFLINE;
> +	else if (mem->movable_only && to_state != MMOP_ONLINE_MOVABLE)
> +		return -EINVAL;
>  
>  	ret = memory_block_action(mem, to_state);
>  	mem->state = ret ? from_state_req : to_state;
> @@ -811,7 +813,8 @@ void memory_block_add_nid_early(struct memory_block *mem, int nid)
>  
>  static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
>  			    struct vmem_altmap *altmap,
> -			    struct memory_group *group)
> +			    struct memory_group *group,
> +			    bool movable_only)
>  {
>  	struct memory_block *mem;
>  	int ret = 0;
> @@ -829,6 +832,7 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state
>  	mem->state = state;
>  	mem->nid = nid;
>  	mem->altmap = altmap;
> +	mem->movable_only = movable_only;
>  	INIT_LIST_HEAD(&mem->group_next);
>  
>  #ifndef CONFIG_NUMA
> @@ -880,7 +884,8 @@ static void remove_memory_block(struct memory_block *memory)
>   */
>  int create_memory_block_devices(unsigned long start, unsigned long size,
>  				int nid, struct vmem_altmap *altmap,
> -				struct memory_group *group)
> +				struct memory_group *group,
> +				bool movable_only)
>  {
>  	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
>  	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
> @@ -893,7 +898,8 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
>  		return -EINVAL;
>  
>  	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
> -		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
> +		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group,
> +				       movable_only);
>  		if (ret)
>  			break;
>  	}
> @@ -998,7 +1004,8 @@ void __init memory_dev_init(void)
>  			continue;
>  
>  		block_id = memory_block_id(nr);
> -		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
> +		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL,
> +				       false);
>  		if (ret) {
>  			panic("%s() failed to add memory block: %d\n",
>  			      __func__, ret);
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index 43d378038ce2..bab24f796d3d 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -80,6 +80,7 @@ struct memory_block {
>  	struct vmem_altmap *altmap;
>  	struct memory_group *group;	/* group (if any) for this block */
>  	struct list_head group_next;	/* next block inside memory group */
> +	bool movable_only;		/* If set, only ZONE_MOVABLE is valid */
>  #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
>  	atomic_long_t nr_hwpoison;
>  #endif
> @@ -160,7 +161,8 @@ extern int register_memory_notifier(struct notifier_block *nb);
>  extern void unregister_memory_notifier(struct notifier_block *nb);
>  int create_memory_block_devices(unsigned long start, unsigned long size,
>  				int nid, struct vmem_altmap *altmap,
> -				struct memory_group *group);
> +				struct memory_group *group,
> +				bool movable_only);
>  void remove_memory_block_devices(unsigned long start, unsigned long size);
>  extern void memory_dev_init(void);
>  extern int memory_notify(unsigned long val, void *v);
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 23f038a16231..ca51ef2ad0cf 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -75,6 +75,19 @@ typedef int __bitwise mhp_t;
>   */
>  #define MHP_OFFLINE_INACCESSIBLE	((__force mhp_t)BIT(3))
>  
> +/*
> + * Restrict hotplugged memory blocks to ZONE_MOVABLE only.
> + *
> + * During offlining of hotplugged memory which was originally onlined
> + * as ZONE_MOVABLE, userland services may detect blocks going offline
> + * and automatically re-online them into ZONE_NORMAL or lower.  When
> + * this happens it may become permanently incapable of being removed.
> + *
> + * Allow driver-managed memory sources to restrict memory blocks to
> + * ZONE_MOVABLE only, so that the truly degenerate case can be mitigated.
> + */
> +#define MHP_MOVABLE_ONLY		((__force mhp_t)BIT(4))
> +
>  /*
>   * Extended parameters for memory hotplug:
>   * altmap: alternative allocator for memmap array (optional)
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 81ba5b019926..1a184bfd87f6 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1346,7 +1346,9 @@ static int check_hotplug_memory_range(u64 start, u64 size)
>  
>  static int online_memory_block(struct memory_block *mem, void *arg)
>  {
> -	mem->online_type = mhp_get_default_online_type();
> +	mem->online_type = mem->movable_only ?
> +			   MMOP_ONLINE_MOVABLE :
> +			   mhp_get_default_online_type();
>  	return device_online(&mem->dev);
>  }
>  
> @@ -1449,6 +1451,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
>  	unsigned long memblock_size = memory_block_size_bytes();
>  	u64 cur_start;
>  	int ret;
> +	bool movable_only = mhp_flags & MHP_MOVABLE_ONLY;
>  
>  	for (cur_start = start; cur_start < start + size;
>  	     cur_start += memblock_size) {
> @@ -1478,7 +1481,8 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
>  
>  		/* create memory block devices after memory was added */
>  		ret = create_memory_block_devices(cur_start, memblock_size, nid,
> -						  params.altmap, group);
> +						  params.altmap, group,
> +						  movable_only);
>  		if (ret) {
>  			arch_remove_memory(cur_start, memblock_size, NULL);
>  			kfree(params.altmap);
> @@ -1506,6 +1510,7 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
>  	struct memory_group *group = NULL;
>  	u64 start, size;
>  	bool new_node = false;
> +	bool movable_only = mhp_flags & MHP_MOVABLE_ONLY;
>  	int ret;
>  
>  	start = res->start;
> @@ -1564,7 +1569,8 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
>  			goto error;
>  
>  		/* create memory block devices after memory was added */
> -		ret = create_memory_block_devices(start, size, nid, NULL, group);
> +		ret = create_memory_block_devices(start, size, nid, NULL, group,
> +						  movable_only);
>  		if (ret) {
>  			arch_remove_memory(start, size, params.altmap);
>  			goto error;
> -- 
> 2.52.0

-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ