[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aOYIwEGgrjpNMGKD@li-2b55cdcc-350b-11b2-a85c-a78bff51fc11.ibm.com>
Date: Wed, 8 Oct 2025 08:46:24 +0200
From: Sumanth Korikkar <sumanthk@...ux.ibm.com>
To: David Hildenbrand <david@...hat.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, linux-mm <linux-mm@...ck.org>,
LKML <linux-kernel@...r.kernel.org>,
linux-s390 <linux-s390@...r.kernel.org>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>, Vasily Gorbik <gor@...ux.ibm.com>,
Alexander Gordeev <agordeev@...ux.ibm.com>
Subject: Re: [PATCH 2/4] s390/sclp: Add support for dynamic (de)configuration
of memory
On Tue, Oct 07, 2025 at 10:07:43PM +0200, David Hildenbrand wrote:
> [...]
>
> > ---
> > drivers/s390/char/sclp_mem.c | 291 +++++++++++++++++++++++++++++------
> > 1 file changed, 241 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c
> > index 27f49f5fd358..802439230294 100644
> > --- a/drivers/s390/char/sclp_mem.c
> > +++ b/drivers/s390/char/sclp_mem.c
> > @@ -9,9 +9,12 @@
> > #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
> > #include <linux/cpufeature.h>
> > +#include <linux/container_of.h>
> > #include <linux/err.h>
> > #include <linux/errno.h>
> > #include <linux/init.h>
> > +#include <linux/kobject.h>
> > +#include <linux/kstrtox.h>
> > #include <linux/memory.h>
> > #include <linux/memory_hotplug.h>
> > #include <linux/mm.h>
> > @@ -27,7 +30,6 @@
> > #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001
> > #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001
> > -static DEFINE_MUTEX(sclp_mem_mutex);
> > static LIST_HEAD(sclp_mem_list);
> > static u8 sclp_max_storage_id;
> > static DECLARE_BITMAP(sclp_storage_ids, 256);
> > @@ -38,6 +40,18 @@ struct memory_increment {
> > int standby;
> > };
> > +struct mblock {
> > + struct kobject kobj;
> > + unsigned int id;
> > + unsigned int memmap_on_memory;
> > + unsigned int config;
> > +};
> > +
> > +struct memory_block_arg {
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > +};
>
> I would avoid using "memory_block_arg" as it reminds of core mm "struct memory_block".
>
> Similarly, I'd not call this "mblock".
>
> What about incorporating the "sclp" side of things?
>
> "struct sclp_mem" / "struct sclp_mem_arg"
>
> Nicely fits "sclp_mem.c" ;)
>
> Something like that might be better.
Sure. I will change it. Thanks
> > +
> > struct assign_storage_sccb {
> > struct sccb_header header;
> > u16 rn;
> > @@ -185,15 +199,11 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> > {
> > unsigned long start, size;
> > struct memory_notify *arg;
> > - unsigned char id;
> > int rc = 0;
> > arg = data;
> > start = arg->start_pfn << PAGE_SHIFT;
> > size = arg->nr_pages << PAGE_SHIFT;
> > - mutex_lock(&sclp_mem_mutex);
> > - for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> > - sclp_attach_storage(id);
> > switch (action) {
> > case MEM_GOING_OFFLINE:
> > /*
> > @@ -204,45 +214,201 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> > if (contains_standby_increment(start, start + size))
> > rc = -EPERM;
> > break;
>
> Is there any reson this notifier is still needed? I'd assume we can just allow
> for offlining + re-onlining as we please now.
>
> In fact, I'd assume we can get rid of the notifier entirely now?
I was initially uncertain about contains_standby_increment() use case
and didnt change it here. However, after testing by removing the
contains_standby_increment() checks, I observed that the common memory
hotplug code already prevents offlining a memory block that contains
holes. This ensures safety without relying on these checks.
c5e79ef561b0 ("mm/memory_hotplug.c: don't allow to online/offline memory blocks with holes")
i.e. #cp define storage 3504M standby 2148M
This leads to a configuration where memory block 27 contains both
assigned and standby incr.
But, offlining it will not succeed:
chmem -d 0x00000000d8000000-0x00000000dfffffff
chmem: Memory Block 27 (0x00000000d8000000-0x00000000dfffffff) disable
failed: Invalid argument
Hence, I will remove it. Thanks.
> > - case MEM_PREPARE_ONLINE:
> > - /*
> > - * Access the altmap_start_pfn and altmap_nr_pages fields
> > - * within the struct memory_notify specifically when dealing
> > - * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
> > - *
> > - * When altmap is in use, take the specified memory range
> > - * online, which includes the altmap.
> > - */
> > - if (arg->altmap_nr_pages) {
> > - start = PFN_PHYS(arg->altmap_start_pfn);
> > - size += PFN_PHYS(arg->altmap_nr_pages);
> > - }
> > - rc = sclp_mem_change_state(start, size, 1);
> > - if (rc || !arg->altmap_nr_pages)
> > - break;
> > - /*
> > - * Set CMMA state to nodat here, since the struct page memory
> > - * at the beginning of the memory block will not go through the
> > - * buddy allocator later.
> > - */
> > - __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
> > + default:
> > break;
> > - case MEM_FINISH_OFFLINE:
> > + }
> > + return rc ? NOTIFY_BAD : NOTIFY_OK;
> > +}
> > +
> > +static ssize_t config_mblock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> > +{
> > + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
> > +
> > + return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->config));
> > +}
> > +
> > +static ssize_t config_mblock_store(struct kobject *kobj, struct kobj_attribute *attr,
> > + const char *buf, size_t count)
> > +{
> > + unsigned long long addr, block_size;
>
> "unsigned long" should be sufficient I'm sure :)
Left over. I will do so.
> > + struct memory_block *mem;
> > + struct mblock *mblock;
> > + unsigned char id;
> > + bool value;
> > + int rc;
> > +
> > + rc = kstrtobool(buf, &value);
> > + if (rc)
> > + return rc;
> > + mblock = container_of(kobj, struct mblock, kobj);
> > + block_size = memory_block_size_bytes();
> > + addr = mblock->id * block_size;
> > + /*
> > + * Hold device_hotplug_lock when adding/removing memory blocks.
> > + * Additionally, also protect calls to find_memory_block() and
> > + * sclp_attach_storage().
> > + */
> > + rc = lock_device_hotplug_sysfs();
> > + if (rc)
> > + goto out;
> > + for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> > + sclp_attach_storage(id);
> > + if (value) {
> > + if (mblock->config)
> > + goto out_unlock;
> > + rc = sclp_mem_change_state(addr, block_size, 1);
> > + if (rc)
> > + goto out_unlock;
> > /*
> > - * When altmap is in use, take the specified memory range
> > - * offline, which includes the altmap.
> > + * Set entire memory block CMMA state to nodat. Later, when
> > + * page tables pages are allocated via __add_memory(), those
> > + * regions are marked __arch_set_page_dat().
> > */
> > - if (arg->altmap_nr_pages) {
> > - start = PFN_PHYS(arg->altmap_start_pfn);
> > - size += PFN_PHYS(arg->altmap_nr_pages);
> > + __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
> > + rc = __add_memory(0, addr, block_size,
> > + mblock->memmap_on_memory ?
> > + MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
> > + if (rc)
> > + goto out_unlock;
>
> Do we have to undo the state change?
Intention was to keep error handling simple. In case of failure in
add_memory(), we would have state set to 1 (not given back). But,
subsequent configuration request for that block will not have an impact.
...
> > +static int create_mblock(struct mblock *mblock, struct kset *kset,
> > + unsigned int id, bool config, bool memmap_on_memory)
> > +{
> > + int rc;
> > +
> > + mblock->memmap_on_memory = memmap_on_memory;
> > + mblock->config = config;
> > + mblock->id = id;
> > + kobject_init(&mblock->kobj, &ktype);
> > + rc = kobject_add(&mblock->kobj, &kset->kobj, "memory%d", id);
> > + if (rc)
> > + return rc;
> > + rc = sysfs_create_group(&mblock->kobj, &mblock_attr_group);
> > + if (rc)
> > + kobject_put(&mblock->kobj);
> > + return rc;
> > +}
> > +
> > +/*
> > + * Create /sys/firmware/memory/memoryX for boottime configured online memory
> > + * blocks
> > + */
> > +static int create_online_mblock(struct memory_block *mem, void *argument)
>
> "online" is conusing. It's "initial" / "configured". Same applies to the other functions
> that mention "online".
Sure. I will change it.
> > +{
> > + struct memory_block_arg *arg;
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > + unsigned int id;
> > +
> > + id = mem->dev.id;
> > + arg = (struct memory_block_arg *)argument;
> > + mblocks = arg->mblocks;
> > + kset = arg->kset;
> > + return create_mblock(&mblocks[id], kset, id, true, false);
> > +}
> > +
> > +static int __init create_initial_online_mblocks(struct mblock *mblocks, struct kset *kset)
> > +{
> > + struct memory_block_arg arg;
> > +
> > + arg.mblocks = mblocks;
> > + arg.kset = kset;
> > + return for_each_memory_block(&arg, create_online_mblock);
> > +}
> > +
> > +static struct mblock * __init allocate_mblocks(void)
> > +{
> > + u64 max_mblocks;
>
> Nit: why an u64? The block ids are "unsigned int id;"
Sure. I will correct it.
> > + u64 block_size;
> > +
> > + block_size = memory_block_size_bytes();
> > + max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
> > + return kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
>
>
> I think you should structure the code a bit differently, not splitting
> the function up into tiny helpers.
>
> static int __init init_sclp_mem(void)
> {
> const u64 block_size = memory_block_size_bytes();
> const u64 max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
> struct sclp_mem_arg arg;
> struct kset *kset;
> int rc;
>
> /* We'll allocate memory for all blocks ahead of time. */
> sclp_mem = kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
> if (!sclp_mem)
> return -ENOMEM;
>
> kset = kset_create_and_add("memory", NULL, firmware_kobj);
> if (!kset)
> return -ENOMEM;
>
> /* Initial memory is in the "configured" state already. */
> arg.sclp_mem = sclp_mem;
> arg.kset = kset;
> rc = for_each_memory_block(&arg, create_configured_sclp_mem);
> if (rc)
> return rc;
>
> /* Standby memory is "deconfigured". */
> return create_standby_sclp_mem(sclp_mem, kset);
> }
>
> Should still be quite readable.
Then, I'll make use of it.
...
> > -static int __init sclp_detect_standby_memory(void)
> > +static int __init sclp_setup_memory(void)
> > {
> > struct read_storage_sccb *sccb;
> > int i, id, assigned, rc;
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > /* No standby memory in kdump mode */
> > if (oldmem_data.start)
>
> Wouldn't we still want to create the ones for initial memory at least?
Intention was the following:
configuration and deconfiguration of memory with optional
memmap-on-memory is mostly needed for only standby memory.
If standby memory is absent or sclp is unavailable, we continue using
the previous behavior (only software offline/online), since the sclp
memory notifier was not registered in that case before either.
Thank you David
Powered by blists - more mailing lists