[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <938a7948-7882-41a3-926b-3d2a8d07620d@kernel.org>
Date: Mon, 1 Dec 2025 19:36:00 +0100
From: "David Hildenbrand (Red Hat)" <david@...nel.org>
To: "Pratik R. Sampat" <prsampat@....com>, linux-mm@...ck.org,
linux-coco@...ts.linux.dev, linux-efi@...r.kernel.org, x86@...nel.org,
linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, kas@...nel.org, ardb@...nel.org,
akpm@...ux-foundation.org, osalvador@...e.de, thomas.lendacky@....com,
michael.roth@....com
Subject: Re: [RFC PATCH 2/4] mm: Add support for unaccepted memory hotplug
On 12/1/25 18:21, Pratik R. Sampat wrote:
>
>
> On 11/28/25 3:32 AM, David Hildenbrand (Red Hat) wrote:
>> On 11/25/25 18:57, Pratik R. Sampat wrote:
>>> The unaccepted memory structure currently only supports accepting memory
>>> present at boot time. The unaccepted table uses a fixed-size bitmap
>>> reserved in memblock based on the initial memory layout, preventing
>>> dynamic addition of memory ranges after boot. This causes guest
>>> termination when memory is hot-added in a secure virtual machine due to
>>> accessing pages that have not transitioned to private before use.
>>>
>>> Extend the unaccepted memory framework to handle hotplugged memory by
>>> dynamically managing the unaccepted bitmap. Allocate a new bitmap when
>>> hotplugged ranges exceed the reserved bitmap capacity and switch to
>>> kernel-managed allocation.
>>>
>>> Hotplugged memory also follows the same acceptance policy using the
>>> accept_memory=[eager|lazy] kernel parameter to accept memory either
>>> up-front when added or before first use.
>>>
>>> Signed-off-by: Pratik R. Sampat <prsampat@....com>
>>> ---
>>> arch/x86/boot/compressed/efi.h | 1 +
>>> .../firmware/efi/libstub/unaccepted_memory.c | 1 +
>>> drivers/firmware/efi/unaccepted_memory.c | 83 +++++++++++++++++++
>>> include/linux/efi.h | 1 +
>>> include/linux/mm.h | 11 +++
>>> mm/memory_hotplug.c | 7 ++
>>> mm/page_alloc.c | 2 +
>>> 7 files changed, 106 insertions(+)
>>>
>>> diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
>>> index 4f7027f33def..a220a1966cae 100644
>>> --- a/arch/x86/boot/compressed/efi.h
>>> +++ b/arch/x86/boot/compressed/efi.h
>>> @@ -102,6 +102,7 @@ struct efi_unaccepted_memory {
>>> u32 unit_size;
>>> u64 phys_base;
>>> u64 size;
>>> + bool mem_reserved;
>>> unsigned long *bitmap;
>>> };
>>> diff --git a/drivers/firmware/efi/libstub/unaccepted_memory.c b/drivers/firmware/efi/libstub/unaccepted_memory.c
>>> index c1370fc14555..b16bd61c12bf 100644
>>> --- a/drivers/firmware/efi/libstub/unaccepted_memory.c
>>> +++ b/drivers/firmware/efi/libstub/unaccepted_memory.c
>>> @@ -83,6 +83,7 @@ efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
>>> unaccepted_table->unit_size = EFI_UNACCEPTED_UNIT_SIZE;
>>> unaccepted_table->phys_base = unaccepted_start;
>>> unaccepted_table->size = bitmap_size;
>>> + unaccepted_table->mem_reserved = true;
>>> memset(unaccepted_table->bitmap, 0, bitmap_size);
>>> status = efi_bs_call(install_configuration_table,
>>> diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c
>>> index 4479aad258f8..8537812346e2 100644
>>> --- a/drivers/firmware/efi/unaccepted_memory.c
>>> +++ b/drivers/firmware/efi/unaccepted_memory.c
>>> @@ -218,6 +218,89 @@ bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size)
>>> return ret;
>>> }
>>> +static int extend_unaccepted_bitmap(phys_addr_t mem_range_start,
>>> + unsigned long mem_range_size)
>>> +{
>>> + struct efi_unaccepted_memory *unacc_tbl;
>>> + unsigned long *old_bitmap, *new_bitmap;
>>> + phys_addr_t start, end, mem_range_end;
>>> + u64 phys_base, size, unit_size;
>>> + unsigned long flags;
>>> +
>>> + unacc_tbl = efi_get_unaccepted_table();
>>> + if (!unacc_tbl || !unacc_tbl->unit_size)
>>> + return -EIO;
>>> +
>>> + unit_size = unacc_tbl->unit_size;
>>> + phys_base = unacc_tbl->phys_base;
>>> +
>>> + mem_range_end = round_up(mem_range_start + mem_range_size, unit_size);
>>> + size = DIV_ROUND_UP(mem_range_end - phys_base, unit_size * BITS_PER_BYTE);
>>> +
>>> + /* Translate to offsets from the beginning of the bitmap */
>>> + start = mem_range_start - phys_base;
>>> + end = mem_range_end - phys_base;
>>> +
>>> + old_bitmap = efi_get_unaccepted_bitmap();
>>> + if (!old_bitmap)
>>> + return -EIO;
>>> +
>>> + /* If the bitmap is already large enough, just set the bits */
>>> + if (unacc_tbl->size >= size) {
>>> + spin_lock_irqsave(&unaccepted_memory_lock, flags);
>>> + bitmap_set(old_bitmap, start / unit_size, (end - start) / unit_size);
>>> + spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
>>> +
>>> + return 0;
>>> + }
>>> +
>>> + /* Reserved memblocks cannot be extended so allocate a new bitmap */
>>> + if (unacc_tbl->mem_reserved) {
>>> + new_bitmap = kzalloc(size, GFP_KERNEL);
>>> + if (!new_bitmap)
>>> + return -ENOMEM;
>>> +
>>> + spin_lock_irqsave(&unaccepted_memory_lock, flags);
>>> + memcpy(new_bitmap, old_bitmap, unacc_tbl->size);
>>> + unacc_tbl->mem_reserved = false;
>>> + free_reserved_area(old_bitmap, old_bitmap + unacc_tbl->size, -1, NULL);
>>> + spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
>>> + } else {
>>> + new_bitmap = krealloc(old_bitmap, size, GFP_KERNEL);
>>> + if (!new_bitmap)
>>> + return -ENOMEM;
>>> +
>>> + /* Zero the bitmap from the range it was extended from */
>>> + memset(new_bitmap + unacc_tbl->size, 0, size - unacc_tbl->size);
>>> + }
>>> +
>>> + bitmap_set(new_bitmap, start / unit_size, (end - start) / unit_size);
>>> +
>>> + spin_lock_irqsave(&unaccepted_memory_lock, flags);
>>> + unacc_tbl->size = size;
>>> + unacc_tbl->bitmap = (unsigned long *)__pa(new_bitmap);
>>> + spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +int accept_hotplug_memory(phys_addr_t mem_range_start, unsigned long mem_range_size)
>>> +{
>>> + int ret;
>>> +
>>> + if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
>>> + return 0;
>>> +
>>> + ret = extend_unaccepted_bitmap(mem_range_start, mem_range_size);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + if (!mm_lazy_accept_enabled())
>>> + accept_memory(mem_range_start, mem_range_size);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> #ifdef CONFIG_PROC_VMCORE
>>> static bool unaccepted_memory_vmcore_pfn_is_ram(struct vmcore_cb *cb,
>>> unsigned long pfn)
>>> diff --git a/include/linux/efi.h b/include/linux/efi.h
>>> index a74b393c54d8..1021eb78388f 100644
>>> --- a/include/linux/efi.h
>>> +++ b/include/linux/efi.h
>>> @@ -545,6 +545,7 @@ struct efi_unaccepted_memory {
>>> u32 unit_size;
>>> u64 phys_base;
>>> u64 size;
>>> + bool mem_reserved;
>>> unsigned long *bitmap;
>>> };
>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>> index 1ae97a0b8ec7..bb43876e6c47 100644
>>> --- a/include/linux/mm.h
>>> +++ b/include/linux/mm.h
>>> @@ -4077,6 +4077,9 @@ int set_anon_vma_name(unsigned long addr, unsigned long size,
>>> bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size);
>>> void accept_memory(phys_addr_t start, unsigned long size);
>>> +int accept_hotplug_memory(phys_addr_t mem_range_start,
>>> + unsigned long mem_range_size);
>>> +bool mm_lazy_accept_enabled(void);
>>> #else
>>> @@ -4090,6 +4093,14 @@ static inline void accept_memory(phys_addr_t start, unsigned long size)
>>> {
>>> }
>>> +static inline int accept_hotplug_memory(phys_addr_t mem_range_start,
>>> + unsigned long mem_range_size)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static inline bool mm_lazy_accept_enabled(void) { return false; }
>>> +
>>> #endif
>>> static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
>>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>>> index 74318c787715..bf8086682b66 100644
>>> --- a/mm/memory_hotplug.c
>>> +++ b/mm/memory_hotplug.c
>>> @@ -1581,6 +1581,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
>>> if (!strcmp(res->name, "System RAM"))
>>> firmware_map_add_hotplug(start, start + size, "System RAM");
>>> + ret = accept_hotplug_memory(start, size);
>>
>> What makes this special that we have to have "hotplug_memory" as part of the name?
>>
>> Staring at the helper itself, there isn't anything really hotplug specific happening in there except extending the bitmap, maybe?
>>
>
> Right, we are extending the original bitmap and initializing a structure
> to track state as well. I added the hotplug_memory keyword without
> much thought, since I didn't see anyone else attempting to extend these
> structures.
>
> That said, I agree the name is awkward. I could either come up with
> something different, or we could eliminate the parent function
> entirely and call extend_unaccepted_bitmap() + accept_memory() directly
> from add_memory_resource(). Similarly, we could do the same to
> s/unaccept_hotplug_memory/unaccept_memory too.
BTW, can't we allocate the bitmap based on maximum memory in the system
as indicated by e820 (which includes to-maybe-be-hotplugged-ranges) and
not do this allocation during hotplug events?
If you search for max_possible_pfn / max_pfn I think you should find
what I mean.
Then it would be a simple accept_memory().
--
Cheers
David
Powered by blists - more mailing lists