[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8d8c706e-6863-4054-b5c0-a37f566f0e7a@intel.com>
Date: Tue, 2 Dec 2025 16:31:16 -0700
From: Dave Jiang <dave.jiang@...el.com>
To: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>,
linux-cxl@...r.kernel.org, linux-kernel@...r.kernel.org,
nvdimm@...ts.linux.dev, linux-fsdevel@...r.kernel.org,
linux-pm@...r.kernel.org
Cc: Alison Schofield <alison.schofield@...el.com>,
Vishal Verma <vishal.l.verma@...el.com>, Ira Weiny <ira.weiny@...el.com>,
Dan Williams <dan.j.williams@...el.com>,
Jonathan Cameron <jonathan.cameron@...wei.com>,
Yazen Ghannam <yazen.ghannam@....com>, Davidlohr Bueso <dave@...olabs.net>,
Matthew Wilcox <willy@...radead.org>, Jan Kara <jack@...e.cz>,
"Rafael J . Wysocki" <rafael@...nel.org>, Len Brown <len.brown@...el.com>,
Pavel Machek <pavel@...nel.org>, Li Ming <ming.li@...omail.com>,
Jeff Johnson <jeff.johnson@....qualcomm.com>,
Ying Huang <huang.ying.caritas@...il.com>,
Yao Xingtao <yaoxt.fnst@...itsu.com>, Peter Zijlstra <peterz@...radead.org>,
Greg KH <gregkh@...uxfoundation.org>,
Nathan Fontenot <nathan.fontenot@....com>,
Terry Bowman <terry.bowman@....com>, Robert Richter <rrichter@....com>,
Benjamin Cheatham <benjamin.cheatham@....com>,
Zhijian Li <lizhijian@...itsu.com>, Borislav Petkov <bp@...en8.de>,
Ard Biesheuvel <ardb@...nel.org>
Subject: Re: [PATCH v4 1/9] dax/hmem, e820, resource: Defer Soft Reserved
insertion until hmem is ready
On 11/19/25 8:19 PM, Smita Koralahalli wrote:
> From: Dan Williams <dan.j.williams@...el.com>
>
> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> instead of the iomem_resource tree at boot. Delay publishing these ranges
> into the iomem hierarchy until ownership is resolved and the HMEM path
> is ready to consume them.
>
> Publishing Soft Reserved ranges into iomem too early conflicts with CXL
> hotplug and prevents region assembly when those ranges overlap CXL
> windows.
>
> Follow up patches will reinsert Soft Reserved ranges into iomem after CXL
> window publication is complete and HMEM is ready to claim the memory. This
> provides a cleaner handoff between EFI-defined memory ranges and CXL
> resource management without trimming or deleting resources later.
>
> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
With changes requested from Dan,
Reviewed-by: Dave Jiang <dave.jiang@...el.com>
> ---
> arch/x86/kernel/e820.c | 2 +-
> drivers/cxl/acpi.c | 2 +-
> drivers/dax/hmem/device.c | 4 +-
> drivers/dax/hmem/hmem.c | 7 ++-
> include/linux/ioport.h | 13 +++++-
> kernel/resource.c | 92 +++++++++++++++++++++++++++++++++------
> 6 files changed, 100 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index c3acbd26408b..c32f144f0e4a 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
> res = e820_res;
> for (i = 0; i < e820_table->nr_entries; i++) {
> if (!res->parent && res->end)
> - insert_resource_expand_to_fit(&iomem_resource, res);
> + insert_resource_expand_to_fit(res);
> res++;
> }
>
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index bd2e282ca93a..b37858f797be 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -847,7 +847,7 @@ static int add_cxl_resources(struct resource *cxl_res)
> */
> cxl_set_public_resource(res, new);
>
> - insert_resource_expand_to_fit(&iomem_resource, new);
> + __insert_resource_expand_to_fit(&iomem_resource, new);
>
> next = res->sibling;
> while (next && resource_overlaps(new, next)) {
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index f9e1a76a04a9..22732b729017 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
>
> static __init int hmem_init(void)
> {
> - walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> - IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
> + walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
> + -1, NULL, hmem_register_one);
> return 0;
> }
>
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index c18451a37e4f..48f4642f4bb8 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -73,11 +73,14 @@ static int hmem_register_device(struct device *host, int target_nid,
> return 0;
> }
>
> - rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
> - IORES_DESC_SOFT_RESERVED);
> + rc = region_intersects_soft_reserve(res->start, resource_size(res),
> + IORESOURCE_MEM,
> + IORES_DESC_SOFT_RESERVED);
> if (rc != REGION_INTERSECTS)
> return 0;
>
> + /* TODO: Add Soft-Reserved memory back to iomem */
> +
> id = memregion_alloc(GFP_KERNEL);
> if (id < 0) {
> dev_err(host, "memregion allocation failure for %pr\n", res);
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index e8b2d6aa4013..e20226870a81 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -232,6 +232,9 @@ struct resource_constraint {
> /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
> extern struct resource ioport_resource;
> extern struct resource iomem_resource;
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +extern struct resource soft_reserve_resource;
> +#endif
>
> extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
> extern int request_resource(struct resource *root, struct resource *new);
> @@ -242,7 +245,8 @@ extern void reserve_region_with_split(struct resource *root,
> const char *name);
> extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
> extern int insert_resource(struct resource *parent, struct resource *new);
> -extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
> +extern void __insert_resource_expand_to_fit(struct resource *root, struct resource *new);
> +extern void insert_resource_expand_to_fit(struct resource *new);
> extern int remove_resource(struct resource *old);
> extern void arch_remove_reservations(struct resource *avail);
> extern int allocate_resource(struct resource *root, struct resource *new,
> @@ -409,6 +413,13 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
> extern int
> walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
> void *arg, int (*func)(struct resource *, void *));
> +extern int
> +walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> + u64 start, u64 end, void *arg,
> + int (*func)(struct resource *, void *));
> +extern int
> +region_intersects_soft_reserve(resource_size_t start, size_t size,
> + unsigned long flags, unsigned long desc);
>
> struct resource *devm_request_free_mem_region(struct device *dev,
> struct resource *base, unsigned long size);
> diff --git a/kernel/resource.c b/kernel/resource.c
> index b9fa2a4ce089..208eaafcc681 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
> }
>
> /**
> - * find_next_iomem_res - Finds the lowest iomem resource that covers part of
> - * [@start..@end].
> + * find_next_res - Finds the lowest resource that covers part of
> + * [@start..@end].
> *
> * If a resource is found, returns 0 and @*res is overwritten with the part
> * of the resource that's within [@start..@end]; if none is found, returns
> * -ENODEV. Returns -EINVAL for invalid parameters.
> *
> + * @parent: resource tree root to search
> * @start: start address of the resource searched for
> * @end: end address of same resource
> * @flags: flags which the resource must have
> @@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
> * The caller must specify @start, @end, @flags, and @desc
> * (which may be IORES_DESC_NONE).
> */
> -static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> - unsigned long flags, unsigned long desc,
> - struct resource *res)
> +static int find_next_res(struct resource *parent, resource_size_t start,
> + resource_size_t end, unsigned long flags,
> + unsigned long desc, struct resource *res)
> {
> struct resource *p;
>
> @@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>
> read_lock(&resource_lock);
>
> - for_each_resource(&iomem_resource, p, false) {
> + for_each_resource(parent, p, false) {
> /* If we passed the resource we are looking for, stop */
> if (p->start > end) {
> p = NULL;
> @@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> return p ? 0 : -ENODEV;
> }
>
> -static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> - unsigned long flags, unsigned long desc,
> - void *arg,
> - int (*func)(struct resource *, void *))
> +static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> + unsigned long flags, unsigned long desc,
> + struct resource *res)
> +{
> + return find_next_res(&iomem_resource, start, end, flags, desc, res);
> +}
> +
> +static int walk_res_desc(struct resource *parent, resource_size_t start,
> + resource_size_t end, unsigned long flags,
> + unsigned long desc, void *arg,
> + int (*func)(struct resource *, void *))
> {
> struct resource res;
> int ret = -EINVAL;
>
> while (start < end &&
> - !find_next_iomem_res(start, end, flags, desc, &res)) {
> + !find_next_res(parent, start, end, flags, desc, &res)) {
> ret = (*func)(&res, arg);
> if (ret)
> break;
> @@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> return ret;
> }
>
> +static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> + unsigned long flags, unsigned long desc,
> + void *arg,
> + int (*func)(struct resource *, void *))
> +{
> + return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
> +}
> +
> +
> /**
> * walk_iomem_res_desc - Walks through iomem resources and calls func()
> * with matching resource ranges.
> @@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
> }
> EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
>
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +struct resource soft_reserve_resource = {
> + .name = "Soft Reserved",
> + .start = 0,
> + .end = -1,
> + .desc = IORES_DESC_SOFT_RESERVED,
> + .flags = IORESOURCE_MEM,
> +};
> +EXPORT_SYMBOL_GPL(soft_reserve_resource);
> +
> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> + u64 start, u64 end, void *arg,
> + int (*func)(struct resource *, void *))
> +{
> + return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
> + arg, func);
> +}
> +EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
> +#endif
> +
> /*
> * This function calls the @func callback against all memory ranges of type
> * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
> @@ -648,6 +685,22 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
> }
> EXPORT_SYMBOL_GPL(region_intersects);
>
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
> + unsigned long flags, unsigned long desc)
> +{
> + int ret;
> +
> + read_lock(&resource_lock);
> + ret = __region_intersects(&soft_reserve_resource, start, size, flags,
> + desc);
> + read_unlock(&resource_lock);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
> +#endif
> +
> void __weak arch_remove_reservations(struct resource *avail)
> {
> }
> @@ -966,7 +1019,7 @@ EXPORT_SYMBOL_GPL(insert_resource);
> * Insert a resource into the resource tree, possibly expanding it in order
> * to make it encompass any conflicting resources.
> */
> -void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
> +void __insert_resource_expand_to_fit(struct resource *root, struct resource *new)
> {
> if (new->parent)
> return;
> @@ -997,7 +1050,20 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
> * to use this interface. The former are built-in and only the latter,
> * CXL, is a module.
> */
> -EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, "CXL");
> +EXPORT_SYMBOL_NS_GPL(__insert_resource_expand_to_fit, "CXL");
> +
> +void insert_resource_expand_to_fit(struct resource *new)
> +{
> + struct resource *root = &iomem_resource;
> +
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> + if (new->desc == IORES_DESC_SOFT_RESERVED)
> + root = &soft_reserve_resource;
> +#endif
> +
> + __insert_resource_expand_to_fit(root, new);
> +}
> +EXPORT_SYMBOL_GPL(insert_resource_expand_to_fit);
>
> /**
> * remove_resource - Remove a resource in the resource tree
Powered by blists - more mailing lists