linux-kernel - [PATCH v2 2/2] hv_balloon: Enable hot-add for memblock sizes

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240501151458.2807-2-mhklinux@outlook.com>
Date: Wed,  1 May 2024 08:14:58 -0700
From: mhkelley58@...il.com
To: haiyangz@...rosoft.com,
	wei.liu@...nel.org,
	decui@...rosoft.com,
	linux-kernel@...r.kernel.org,
	linux-hyperv@...r.kernel.org
Cc: david@...hat.com
Subject: [PATCH v2 2/2] hv_balloon: Enable hot-add for memblock sizes > 128 MiB

From: Michael Kelley <mhklinux@...look.com>

The Hyper-V balloon driver supports hot-add of memory in addition
to ballooning. Current code hot-adds in fixed size chunks of
128 MiB (fixed constant HA_CHUNK in the code). While this works
in Hyper-V VMs with 64 GiB or less or memory where the Linux
memblock size is 128 MiB, the hot-add fails for larger memblock
sizes because add_memory() expects memory to be added in chunks
that match the memblock size. Messages like the following are
reported when Linux has a 256 MiB memblock size:

[  312.668859] Block size [0x10000000] unaligned hotplug range:
               start 0x310000000, size 0x8000000
[  312.668880] hv_balloon: hot_add memory failed error is -22
[  312.668984] hv_balloon: Memory hot add failed

Larger memblock sizes are usually used in VMs with more than
64 GiB of memory, depending on the alignment of the VM's
physical address space.

Fix this problem by having the Hyper-V balloon driver determine
the Linux memblock size, and process hot-add requests in that
chunk size instead of a fixed 128 MiB. Also update the hot-add
alignment requested of the Hyper-V host to match the memblock
size.

The code changes look significant, but in fact are just a
simple text substitution of a new global variable for the
previous HA_CHUNK constant. No algorithms are changed except
to initialize the new global variable and to calculate the
alignment value to pass to Hyper-V. Testing with memblock
sizes of 256 MiB and 2 GiB shows correct operation.

Signed-off-by: Michael Kelley <mhklinux@...look.com>
---
Changes in v2:
* Change new global variable name from ha_chunk_pgs to
  ha_pages_in_chunk [David Hildenbrand]
* Use kernel macros ALIGN(), ALIGN_DOWN(), and umin()
  to simplify code and reduce references to HA_CHUNK. For
  ease of review, this is done in a new patch preceeding
  this one. [David Hildenbrand]

 drivers/hv/hv_balloon.c | 55 +++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 9f45b8a6762c..e0a1a18041ca 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -425,11 +425,11 @@ struct dm_info_msg {
  * The range start_pfn : end_pfn specifies the range
  * that the host has asked us to hot add. The range
  * start_pfn : ha_end_pfn specifies the range that we have
- * currently hot added. We hot add in multiples of 128M
- * chunks; it is possible that we may not be able to bring
- * online all the pages in the region. The range
+ * currently hot added. We hot add in chunks equal to the
+ * memory block size; it is possible that we may not be able
+ * to bring online all the pages in the region. The range
  * covered_start_pfn:covered_end_pfn defines the pages that can
- * be brough online.
+ * be brought online.
  */
 
 struct hv_hotadd_state {
@@ -505,8 +505,9 @@ enum hv_dm_state {
 
 static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
 static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];
+static unsigned long ha_pages_in_chunk;
+
 #define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE)
-#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE)
 
 struct hv_dynmem_device {
 	struct hv_device *dev;
@@ -724,21 +725,21 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
 
-	for (i = 0; i < (size/HA_CHUNK); i++) {
-		start_pfn = start + (i * HA_CHUNK);
+	for (i = 0; i < (size/ha_pages_in_chunk); i++) {
+		start_pfn = start + (i * ha_pages_in_chunk);
 
 		scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
-			has->ha_end_pfn +=  HA_CHUNK;
-			processed_pfn = umin(total_pfn, HA_CHUNK);
+			has->ha_end_pfn += ha_pages_in_chunk;
+			processed_pfn = umin(total_pfn, ha_pages_in_chunk);
 			total_pfn -= processed_pfn;
-			has->covered_end_pfn +=  processed_pfn;
+			has->covered_end_pfn += processed_pfn;
 		}
 
 		reinit_completion(&dm_device.ol_waitevent);
 
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
-				(HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
+				(ha_pages_in_chunk << PAGE_SHIFT), MHP_MERGE_RESOURCE);
 
 		if (ret) {
 			pr_err("hot_add memory failed error is %d\n", ret);
@@ -753,7 +754,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 				do_hot_add = false;
 			}
 			scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
-				has->ha_end_pfn -= HA_CHUNK;
+				has->ha_end_pfn -= ha_pages_in_chunk;
 				has->covered_end_pfn -=  processed_pfn;
 			}
 			break;
@@ -829,9 +830,9 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 		 * our current limit; extend it.
 		 */
 		if ((start_pfn + pfn_cnt) > has->end_pfn) {
-			/* Extend the region by multiples of HA_CHUNK */
+			/* Extend the region by multiples of ha_pages_in_chunk */
 			residual = (start_pfn + pfn_cnt - has->end_pfn);
-			has->end_pfn += ALIGN(residual, HA_CHUNK);
+			has->end_pfn += ALIGN(residual, ha_pages_in_chunk);
 		}
 
 		ret = 1;
@@ -897,12 +898,12 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			 * We have some residual hot add range
 			 * that needs to be hot added; hot add
 			 * it now. Hot add a multiple of
-			 * HA_CHUNK that fully covers the pages
+			 * ha_pages_in_chunk that fully covers the pages
 			 * we have.
 			 */
 			size = (has->end_pfn - has->ha_end_pfn);
 			if (pfn_cnt <= size) {
-				size = ALIGN(pfn_cnt, HA_CHUNK);
+				size = ALIGN(pfn_cnt, ha_pages_in_chunk);
 			} else {
 				pfn_cnt = size;
 			}
@@ -1003,8 +1004,8 @@ static void hot_add_req(struct work_struct *dummy)
 		 * that need to be hot-added while ensuring the alignment
 		 * and size requirements of Linux as it relates to hot-add.
 		 */
-		rg_start = ALIGN_DOWN(pg_start, HA_CHUNK);
-		rg_sz = ALIGN(pfn_cnt, HA_CHUNK);
+		rg_start = ALIGN_DOWN(pg_start, ha_pages_in_chunk);
+		rg_sz = ALIGN(pfn_cnt, ha_pages_in_chunk);
 	}
 
 	if (do_hot_add)
@@ -1807,10 +1808,13 @@ static int balloon_connect_vsp(struct hv_device *dev)
 	cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
 
 	/*
-	 * Specify our alignment requirements as it relates
-	 * memory hot-add. Specify 128MB alignment.
+	 * Specify our alignment requirements for memory hot-add. The value is
+	 * the log base 2 of the number of megabytes in a chunk. For example,
+	 * with 256 MiB chunks, the value is 8. The number of MiB in a chunk
+	 * must be a power of 2.
 	 */
-	cap_msg.caps.cap_bits.hot_add_alignment = 7;
+	cap_msg.caps.cap_bits.hot_add_alignment =
+			ilog2(ha_pages_in_chunk >> (20 - PAGE_SHIFT));
 
 	/*
 	 * Currently the host does not use these
@@ -2132,6 +2136,15 @@ static  struct hv_driver balloon_drv = {
 
 static int __init init_balloon_drv(void)
 {
+	/*
+	 * Hot-add must operate in chunks that are of size
+	 * equal to the memory block size because that's
+	 * what the core add_memory() interface requires.
+	 * The Hyper-V interface requires that the memory block
+	 * size be a power of 2, which is guaranteed by the
+	 * check in memory_dev_init().
+	 */
+	ha_pages_in_chunk = memory_block_size_bytes() / PAGE_SIZE;
 
 	return vmbus_driver_register(&balloon_drv);
 }
-- 
2.25.1