[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250511025840.2410154-1-joshua.hahnjy@gmail.com>
Date: Sat, 10 May 2025 19:58:39 -0700
From: Joshua Hahn <joshua.hahnjy@...il.com>
To: Joshua Hahn <joshua.hahnjy@...il.com>,
Andrew Morton <akpm@...ux-foundation.org>
Cc: "Huang, Ying" <ying.huang@...ux.alibaba.com>,
gourry@...rry.net,
honggyu.kim@...com,
yunjeong.mun@...com,
gregkh@...uxfoundation.org,
rafael@...nel.org,
lenb@...nel.org,
dan.j.williams@...el.com,
Jonathan.Cameron@...wei.com,
dave.jiang@...el.com,
horen.chuang@...ux.dev,
hannes@...xchg.org,
osalvador@...e.de,
linux-kernel@...r.kernel.org,
linux-acpi@...r.kernel.org,
linux-mm@...ck.org,
kernel-team@...a.com
Subject: Re: [PATCH v8] mm/mempolicy: Weighted Interleave Auto-tuning
Hello Andrew,
I was hoping to fold this fixlet in with the patch this belongs to. It includes
some wordsmithing changes, some code simplification/cleanups, and makes sure
that the code behavior matches that of the ABI I described. I've kept the
original message below as well, where Ying suggested the changes present in
this fixlet.
Please let me know if this fixlet is too big, and you would rather prefer a
new version instead. Thank you as always for your patience and support!
Joshua
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
index ec13382c606f..649c0e9b895c 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
@@ -24,7 +24,7 @@ Description: Weight configuration interface for nodeN
empty string, ...) will return -EINVAL.
Changing the weight to a valid value will automatically
- update the system to manual mode as well.
+ switch the system to manual mode as well.
What: /sys/kernel/mm/mempolicy/weighted_interleave/auto
Date: May 2025
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3e8da8ba1146..0fe96f3ab3ef 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -57,15 +57,6 @@ struct mempolicy {
} w;
};
-/*
- * A null weighted_interleave_state is interpted as having .mode = "auto",
- * and .iw_table is interpreted as an array of 1s with length nr_node_ids.
- */
-struct weighted_interleave_state {
- bool mode_auto;
- u8 iw_table[];
-};
-
/*
* Support for managing mempolicy data objects (clone, copy, destroy)
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f542691b7123..0624d735a2e7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -148,6 +148,14 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
*/
static const int weightiness = 32;
+/*
+ * A null weighted_interleave_state is interpreted as having .mode="auto",
+ * and .iw_table is interpreted as an array of 1s with length nr_node_ids.
+ */
+struct weighted_interleave_state {
+ bool mode_auto;
+ u8 iw_table[];
+};
static struct weighted_interleave_state __rcu *wi_state;
static unsigned int *node_bw_table;
@@ -3561,9 +3569,8 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
int i;
node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
- if (count == 0 || sysfs_streq(buf, ""))
- weight = 0;
- else if (kstrtou8(buf, 0, &weight) || weight == 0)
+ if (count == 0 || sysfs_streq(buf, "") ||
+ kstrtou8(buf, 0, &weight) || weight == 0)
return -EINVAL;
new_wi_state = kzalloc(struct_size(new_wi_state, iw_table, nr_node_ids),
@@ -3630,9 +3637,15 @@ static ssize_t weighted_interleave_auto_store(struct kobject *kobj,
if (!input) {
old_wi_state = rcu_dereference_protected(wi_state,
lockdep_is_held(&wi_state_lock));
- if (old_wi_state)
- memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
- nr_node_ids * sizeof(u8));
+ if (!old_wi_state)
+ goto update_wi_state;
+ if (input == old_wi_state->mode_auto) {
+ mutex_unlock(&wi_state_lock);
+ return count;
+ }
+
+ memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
+ nr_node_ids * sizeof(u8));
goto update_wi_state;
}
@@ -3707,8 +3720,12 @@ static void wi_state_free(void)
kfree(&wi_group->wi_kobj);
}
+static struct kobj_attribute wi_auto_attr =
+ __ATTR(auto, 0664, weighted_interleave_auto_show,
+ weighted_interleave_auto_store);
+
static void wi_cleanup(void) {
- sysfs_remove_file(&wi_group->wi_kobj, &wi_group->auto_kobj_attr.attr);
+ sysfs_remove_file(&wi_group->wi_kobj, &wi_auto_attr.attr);
sysfs_wi_node_delete_all();
wi_state_free();
}
@@ -3798,10 +3815,6 @@ static int wi_node_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-static struct kobj_attribute wi_auto_attr =
- __ATTR(auto, 0664, weighted_interleave_auto_show,
- weighted_interleave_auto_store);
-
static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
{
int nid, err;
On Sat, 10 May 2025 11:51:50 -0700 Joshua Hahn <joshua.hahnjy@...il.com> wrote:
> On Sat, 10 May 2025 13:25:32 +0800 "Huang, Ying" <ying.huang@...ux.alibaba.com> wrote:
>
> Hi Ying,
> Thank you for reviewing my patch, as always!
>
> > Hi, Joshua,
> >
> > Thank you for updated version! And sorry for late reply.
> >
> > Joshua Hahn <joshua.hahnjy@...il.com> writes:
>
> [...snip...]
>
> > > diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
> > > index 0b7972de04e9..ec13382c606f 100644
> > > --- a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
> > > +++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
> > > @@ -20,6 +20,35 @@ Description: Weight configuration interface for nodeN
> > > Minimum weight: 1
> > > Maximum weight: 255
> > >
> > > - Writing an empty string or `0` will reset the weight to the
> > > - system default. The system default may be set by the kernel
> > > - or drivers at boot or during hotplug events.
> > > + Writing invalid values (i.e. any values not in [1,255],
> > > + empty string, ...) will return -EINVAL.
> > > +
> > > + Changing the weight to a valid value will automatically
> > > + update the system to manual mode as well.
> >
> > s/update/switch/ ?
> >
> > But my English is poor, please keep your version if you think that it's
> > better.
>
> I have no particular preference here, whatever will make it easiest for the
> users to understand what is happening. I'll take your suggestion!
>
> [...snip...]
>
> > > +/*
> > > + * A null weighted_interleave_state is interpted as having .mode = "auto",
> > > + * and .iw_table is interpreted as an array of 1s with length nr_node_ids.
> > > + */
> >
> > Better to move the comments to above "wi_state" definition?
> >
> > > +struct weighted_interleave_state {
> > > + bool mode_auto;
> > > + u8 iw_table[];
> > > +};
> > > +
> >
> > Why do you put the type definition in mempolicy.h instead of
> > mempolicy.c? I don't find other users except mempolicy.c.
>
> Good point, I'll move the definition to mempolicy.c and move the comment
> to the wi_state definition as well.
>
> [...snip...]
>
> > > @@ -3450,31 +3555,104 @@ static ssize_t node_show(struct kobject *kobj, struct kobj_attribute *attr,
> > > static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
> > > const char *buf, size_t count)
> > > {
> > > + struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
> > > struct iw_node_attr *node_attr;
> > > - u8 *new;
> > > - u8 *old;
> > > u8 weight = 0;
> > > + int i;
> > >
> > > node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
> > > if (count == 0 || sysfs_streq(buf, ""))
> > > weight = 0;
> >
> > According to revised ABI, we should return -EINVAL here?
>
> Great catch, I completely ignored the ABI description that I wrote...
> I'll go ahead and just return -EINVAL here!
>
> [...snip...]
>
> > > +static ssize_t weighted_interleave_auto_store(struct kobject *kobj,
> > > + struct kobj_attribute *attr, const char *buf, size_t count)
> > > +{
> > > + struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
> > > + unsigned int *bw;
> > > + bool input;
> > > + int i;
> > > +
> > > + if (kstrtobool(buf, &input))
> > > + return -EINVAL;
> > > +
> > > + new_wi_state = kzalloc(struct_size(new_wi_state, iw_table, nr_node_ids),
> > > + GFP_KERNEL);
> > > + if (!new_wi_state)
> > > + return -ENOMEM;
> > > + for (i = 0; i < nr_node_ids; i++)
> > > + new_wi_state->iw_table[i] = 1;
> > > +
> > > + mutex_lock(&wi_state_lock);
> > > + if (!input) {
> >
> > If input == old_wi_state->mode_auto, we can return directly?
>
> Yes, that makes sense to me.
>
> > > static void wi_cleanup(void) {
> > > + sysfs_remove_file(&wi_group->wi_kobj, &wi_group->auto_kobj_attr.attr);
> >
> > Why not just
> >
> > sysfs_remove_file(&wi_group->wi_kobj, &wi_auto_attr.attr);
> >
> > ?
>
> Also makes sense!!
>
> > ---
> > Best Regards,
> > Huang, Ying
>
> Thank you for your great feedback Ying, I'll make changes based on
> your suggestions and shortly send up a v9. I hope you have a great day!
> Joshua
>
Powered by blists - more mailing lists