[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200107130950.2983-8-Tianyu.Lan@microsoft.com>
Date: Tue, 7 Jan 2020 21:09:47 +0800
From: lantianyu1986@...il.com
To: kys@...rosoft.com, haiyangz@...rosoft.com, sthemmin@...rosoft.com,
sashal@...nel.org, michael.h.kelley@...rosoft.com, david@...hat.com
Cc: Tianyu Lan <Tianyu.Lan@...rosoft.com>,
linux-hyperv@...r.kernel.org, linux-kernel@...r.kernel.org,
vkuznets@...hat.com, eric.devolder@...cle.com
Subject: [RFC PATCH V2 7/10] x86/Hyper-V/Balloon: Handle mem hot-remove request
From: Tianyu Lan <Tianyu.Lan@...rosoft.com>
Linux system mem hot plug unit is 128MB and request page
number maybe not aligned with unit. The non-aligned case
will handle in the later.
Handle mem hot-remve request:
First, search memory from ha region list. If find suitable memory
block, offline & remove memory and create ha region region "gap"
struct for the range. "gap" means the range in the hot-add region
is offlined or removed. The following mem hot-add msg may add
memory in the gap range back.
If there is no suitable memory in the hot-add region, search memory
from the system memory on the target node and perform offline&remove
memory.
Signed-off-by: Tianyu Lan <Tianyu.Lan@...rosoft.com>
---
drivers/hv/hv_balloon.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 184 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 43e490f492d5..3d8c09fe148a 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -56,6 +56,10 @@
#define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
#define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
+#define MAX_HOT_REMOVE_ENTRIES \
+ ((PAGE_SIZE - sizeof(struct dm_hot_remove_response)) \
+ / sizeof(union dm_mem_page_range))
+
enum {
DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
@@ -697,6 +701,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
{
struct memory_notify *mem = (struct memory_notify *)v;
unsigned long pfn_count;
+ int need_unlock;
switch (val) {
case MEM_ONLINE:
@@ -708,7 +713,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
break;
case MEM_OFFLINE:
- mutex_lock(&dm_device.ha_lock);
+ if (dm_device.lock_thread != current) {
+ mutex_lock(&dm_device.ha_lock);
+ need_unlock = 1;
+ }
+
pfn_count = hv_page_offline_check(mem->start_pfn,
mem->nr_pages);
if (pfn_count <= dm_device.num_pages_onlined) {
@@ -722,7 +731,9 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
WARN_ON_ONCE(1);
dm_device.num_pages_onlined = 0;
}
- mutex_unlock(&dm_device.ha_lock);
+
+ if (need_unlock)
+ mutex_unlock(&dm_device.ha_lock);
break;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
@@ -1046,14 +1057,183 @@ static unsigned long process_hot_add(unsigned long pg_start,
return handle_pg_range(pg_start, pfn_cnt);
}
+static int hv_hot_remove_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages,
+ unsigned long *request_index,
+ union dm_mem_page_range *range_array,
+ struct hv_hotadd_state *has)
+{
+ unsigned long block_pages = HA_CHUNK;
+ unsigned long rm_pages = nr_pages;
+ unsigned long pfn;
+ int ret;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += block_pages) {
+ struct hv_hotadd_gap *gap;
+ int in_gap;
+
+ if (*request_index >= MAX_HOT_REMOVE_ENTRIES) {
+ struct dm_hot_remove_response *resp =
+ (struct dm_hot_remove_response *)
+ balloon_up_send_buffer;
+
+ /* Flush out all hot-remove ranges. */
+ ret = hv_send_hot_remove_response(resp, *request_index,
+ true);
+ if (ret)
+ return ret;
+
+ /* Reset request buffer. */
+ memset(resp, 0x00, PAGE_SIZE);
+ *request_index = 0;
+ }
+
+ /*
+ * Memory in hot-add region gaps has been offlined or removed
+ * and so skip it if remove range overlap with gap.
+ */
+ if (has) {
+ list_for_each_entry(gap, &has->gap_list, list)
+ if (!(pfn >= gap->end_pfn ||
+ pfn + block_pages < gap->start_pfn)) {
+ in_gap = 1;
+ break;
+ }
+
+ if (in_gap)
+ continue;
+ }
+
+ if (online_section_nr(pfn_to_section_nr(pfn))
+ && is_mem_section_removable(pfn, block_pages)) {
+ ret = offline_and_remove_memory(nid, pfn << PAGE_SHIFT,
+ block_pages << PAGE_SHIFT);
+ if (ret)
+ continue;
+
+ range_array[*request_index].finfo.start_page = pfn;
+ range_array[*request_index].finfo.page_cnt
+ = block_pages;
+
+ (*request_index)++;
+ nr_pages -= block_pages;
+
+ if (!nr_pages)
+ break;
+ }
+ }
+
+ return rm_pages - nr_pages;
+}
+
+static int hv_hot_remove_from_ha_list(unsigned int nid, unsigned long nr_pages,
+ unsigned long *request_index,
+ union dm_mem_page_range *range_array)
+{
+ struct hv_hotadd_state *has;
+ unsigned long start_pfn, end_pfn;
+ int rm_pages;
+ int old_index;
+ int ret, i;
+
+ mutex_lock(&dm_device.ha_lock);
+ dm_device.lock_thread = current;
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ rm_pages = min(nr_pages,
+ has->covered_end_pfn - has->start_pfn);
+ start_pfn = has->start_pfn;
+ end_pfn = has->covered_end_pfn;
+ old_index = *request_index;
+
+ if (!rm_pages || pfn_to_nid(start_pfn) != nid)
+ continue;
+
+ rm_pages = hv_hot_remove_range(nid, start_pfn, end_pfn,
+ rm_pages, request_index, range_array, has);
+ if (rm_pages < 0) {
+ ret = rm_pages;
+ goto error;
+ } else if (!rm_pages) {
+ continue;
+ }
+
+ nr_pages -= rm_pages;
+ dm_device.num_pages_added -= rm_pages;
+
+ /* Create gaps for hot remove regions. */
+ for (i = old_index; i < *request_index; i++) {
+ struct hv_hotadd_gap *gap;
+
+ gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+ if (!gap) {
+ /*
+ * Disable dm hot-plug when fails to allocate
+ * memory for gaps.
+ */
+ ret = -ENOMEM;
+ do_hot_add = false;
+ goto error;
+ }
+
+ INIT_LIST_HEAD(&gap->list);
+ gap->start_pfn = range_array[i].finfo.start_page;
+ gap->end_pfn =
+ gap->start_pfn + range_array[i].finfo.page_cnt;
+ list_add_tail(&gap->list, &has->gap_list);
+ }
+
+ if (!nr_pages)
+ break;
+ }
+
+ ret = nr_pages;
+ error:
+ dm_device.lock_thread = NULL;
+ mutex_unlock(&dm_device.ha_lock);
+
+ return ret;
+}
+
+static void hv_mem_hot_remove(unsigned int nid, u64 nr_pages)
+{
+ struct dm_hot_remove_response *resp
+ = (struct dm_hot_remove_response *)balloon_up_send_buffer;
+ unsigned long start_pfn = node_start_pfn(nid);
+ unsigned long end_pfn = node_end_pfn(nid);
+ unsigned long request_index = 0;
+ int remain_pages;
+
+ /* Todo: Handle request of non-aligned page number later. */
+
+ /* Search hot-remove memory region from hot add list first.*/
+ memset(resp, 0x00, PAGE_SIZE);
+ remain_pages = hv_hot_remove_from_ha_list(nid, nr_pages,
+ &request_index,
+ resp->range_array);
+ if (remain_pages < 0) {
+ /* Send failure response msg. */
+ request_index = 0;
+ } else if (remain_pages) {
+ start_pfn = ALIGN(start_pfn, HA_CHUNK);
+ hv_hot_remove_range(nid, start_pfn, end_pfn, remain_pages,
+ &request_index, resp->range_array, NULL);
+ }
+
+ hv_send_hot_remove_response(resp, request_index, false);
+}
+
#endif
static void hot_remove_req(union dm_msg_info *msg_info)
{
struct hv_dynmem_device *dm = &dm_device;
+ unsigned int numa_node = msg_info->hot_remove.virtual_node;
+ unsigned int page_count = msg_info->hot_remove.page_count;
- /* Add hot remove operation later and send failure response. */
- hv_send_hot_remove_response((struct dm_hot_remove_response *)
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && do_hot_add)
+ hv_mem_hot_remove(numa_node, page_count);
+ else
+ hv_send_hot_remove_response((struct dm_hot_remove_response *)
balloon_up_send_buffer, 0, false);
dm->state = DM_INITIALIZED;
--
2.14.5
Powered by blists - more mailing lists