lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <aIcxs2nk3RNWWbD6@localhost.localdomain>
Date: Mon, 28 Jul 2025 10:15:47 +0200
From: Oscar Salvador <osalvador@...e.de>
To: david@...hat.com
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org,
	Michal Hocko <mhocko@...e.com>, Hannes Reinecke <hare@...nel.org>
Subject: [RFC] Disable auto_movable_ratio for selfhosted memmap

Hi,

Currently, we have several mechanisms to pick a zone for the new memory we are
onlining.
Eventually, we will land on zone_for_pfn_range() which will pick the zone.

Two of these mechanisms are 'movable_node' and 'auto-movable' policy.
The former will put every single hotpluggled memory in ZONE_MOVABLE
(unless we can keep zones contiguous by not doing so), while the latter
will put it in ZONA_MOVABLE IFF we are within the established ratio
MOVABLE:KERNEL.

It seems, the later doesn't play well with CXL memory where CXL cards hold really
large amounts of memory, making the ratio fail, and since CXL cards must be removed
as a unit, it can't be done if any memory block fell within
!ZONE_MOVABLE zone.

One way to tackle this would be update the ratio every time a new CXL
card gets inserted, but this seems suboptimal.
Another way is that since CXL memory works with selfhosted memmap, we could relax
the check when 'auto-movable' and only look at the ratio if we aren't
working with selfhosted memmap.

Something like the following (acthung: it's just a PoC)
Comments? Ideas? 

 diff --git a/drivers/base/memory.c b/drivers/base/memory.c
 index 5c6c1d6bb59f..ff87cfb3881a 100644
 --- a/drivers/base/memory.c
 +++ b/drivers/base/memory.c
 @@ -234,7 +234,7 @@ static int memory_block_online(struct memory_block *mem)
  		return -EHWPOISON;
 
  	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
 -				  start_pfn, nr_pages);
 +				  start_pfn, nr_pages, mem->altmap);
 
  	/*
  	 * Although vmemmap pages have a different lifecycle than the pages
 @@ -473,11 +473,11 @@ static ssize_t phys_device_show(struct device *dev,
  static int print_allowed_zone(char *buf, int len, int nid,
  			      struct memory_group *group,
  			      unsigned long start_pfn, unsigned long nr_pages,
 -			      int online_type, struct zone *default_zone)
 +			      int online_type, struct zone *default_zone, struct vmem_altmap *altmap)
  {
  	struct zone *zone;
 
 -	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
 +	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages, altmap);
  	if (zone == default_zone)
  		return 0;
 
 @@ -509,13 +509,13 @@ static ssize_t valid_zones_show(struct device *dev,
  	}
 
  	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
 -					  start_pfn, nr_pages);
 +					  start_pfn, nr_pages, mem->altmap);
 
  	len = sysfs_emit(buf, "%s", default_zone->name);
  	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 -				  MMOP_ONLINE_KERNEL, default_zone);
 +				  MMOP_ONLINE_KERNEL, default_zone, mem->altmap);
  	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 -				  MMOP_ONLINE_MOVABLE, default_zone);
 +				  MMOP_ONLINE_MOVABLE, default_zone, mem->altmap);
  	len += sysfs_emit_at(buf, len, "\n");
  	return len;
  }
 diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
 index 23f038a16231..89f7b9c5d995 100644
 --- a/include/linux/memory_hotplug.h
 +++ b/include/linux/memory_hotplug.h
 @@ -328,7 +328,7 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
  					  unsigned long pnum);
  extern struct zone *zone_for_pfn_range(int online_type, int nid,
  		struct memory_group *group, unsigned long start_pfn,
 -		unsigned long nr_pages);
 +		unsigned long nr_pages, struct vmem_altmap *altmap);
  extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
  				      struct mhp_params *params);
  void arch_remove_linear_mapping(u64 start, u64 size);
 diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
 index 69a636e20f7b..6c6600a9c839 100644
 --- a/mm/memory_hotplug.c
 +++ b/mm/memory_hotplug.c
 @@ -1048,7 +1048,7 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn
 
  struct zone *zone_for_pfn_range(int online_type, int nid,
  		struct memory_group *group, unsigned long start_pfn,
 -		unsigned long nr_pages)
 +		unsigned long nr_pages, struct vmem_altmap *altmap)
  {
  	if (online_type == MMOP_ONLINE_KERNEL)
  		return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
 @@ -1056,6 +1056,10 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
  	if (online_type == MMOP_ONLINE_MOVABLE)
  		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
 
 +	/* Selfhosted memmap, skip ratio check */
 +	if (online_policy == ONLINE_POLICY_AUTO_MOVABLE && altmap)
 +		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
 +
  	if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
  		return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);

-- 
Oscar Salvador
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ