[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <877com68zm.fsf@nvdebian.thelocal>
Date: Tue, 19 Sep 2023 15:13:18 +1000
From: Alistair Popple <apopple@...dia.com>
To: Huang Ying <ying.huang@...el.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, Bharata B Rao <bharata@....com>,
"Aneesh Kumar K . V" <aneesh.kumar@...ux.ibm.com>,
Wei Xu <weixugc@...gle.com>,
Dan Williams <dan.j.williams@...el.com>,
Dave Hansen <dave.hansen@...el.com>,
Davidlohr Bueso <dave@...olabs.net>,
Johannes Weiner <hannes@...xchg.org>,
Jonathan Cameron <Jonathan.Cameron@...wei.com>,
Michal Hocko <mhocko@...nel.org>,
Yang Shi <shy828301@...il.com>,
Dave Jiang <dave.jiang@...el.com>,
Rafael J Wysocki <rafael.j.wysocki@...el.com>
Subject: Re: [PATCH -V3 1/4] memory tiering: add abstract distance
calculation algorithms management
Reviewed-by: Alistair Popple <apopple@...dia.com>
Huang Ying <ying.huang@...el.com> writes:
> The abstract distance may be calculated by various drivers, such as
> ACPI HMAT, CXL CDAT, etc. While it may be used by various code which
> hot-add memory node, such as dax/kmem etc. To decouple the algorithm
> users and the providers, the abstract distance calculation algorithms
> management mechanism is implemented in this patch. It provides
> interface for the providers to register the implementation, and
> interface for the users.
>
> Multiple algorithm implementations can cooperate via calculating
> abstract distance for different memory nodes. The preference of
> algorithm implementations can be specified via
> priority (notifier_block.priority).
>
> Signed-off-by: "Huang, Ying" <ying.huang@...el.com>
> Tested-by: Bharata B Rao <bharata@....com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@...ux.ibm.com>
> Cc: Wei Xu <weixugc@...gle.com>
> Cc: Alistair Popple <apopple@...dia.com>
> Cc: Dan Williams <dan.j.williams@...el.com>
> Cc: Dave Hansen <dave.hansen@...el.com>
> Cc: Davidlohr Bueso <dave@...olabs.net>
> Cc: Johannes Weiner <hannes@...xchg.org>
> Cc: Jonathan Cameron <Jonathan.Cameron@...wei.com>
> Cc: Michal Hocko <mhocko@...nel.org>
> Cc: Yang Shi <shy828301@...il.com>
> Cc: Dave Jiang <dave.jiang@...el.com>
> Cc: Rafael J Wysocki <rafael.j.wysocki@...el.com>
> ---
> include/linux/memory-tiers.h | 19 ++++++++++++
> mm/memory-tiers.c | 59 ++++++++++++++++++++++++++++++++++++
> 2 files changed, 78 insertions(+)
>
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 437441cdf78f..c8382220cced 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -6,6 +6,7 @@
> #include <linux/nodemask.h>
> #include <linux/kref.h>
> #include <linux/mmzone.h>
> +#include <linux/notifier.h>
> /*
> * Each tier cover a abstrace distance chunk size of 128
> */
> @@ -36,6 +37,9 @@ struct memory_dev_type *alloc_memory_type(int adistance);
> void put_memory_type(struct memory_dev_type *memtype);
> void init_node_memory_type(int node, struct memory_dev_type *default_type);
> void clear_node_memory_type(int node, struct memory_dev_type *memtype);
> +int register_mt_adistance_algorithm(struct notifier_block *nb);
> +int unregister_mt_adistance_algorithm(struct notifier_block *nb);
> +int mt_calc_adistance(int node, int *adist);
> #ifdef CONFIG_MIGRATION
> int next_demotion_node(int node);
> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
> @@ -97,5 +101,20 @@ static inline bool node_is_toptier(int node)
> {
> return true;
> }
> +
> +static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
> +{
> + return 0;
> +}
> +
> +static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb)
> +{
> + return 0;
> +}
> +
> +static inline int mt_calc_adistance(int node, int *adist)
> +{
> + return NOTIFY_DONE;
> +}
> #endif /* CONFIG_NUMA */
> #endif /* _LINUX_MEMORY_TIERS_H */
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index 37a4f59d9585..76c0ad47a5ad 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -5,6 +5,7 @@
> #include <linux/kobject.h>
> #include <linux/memory.h>
> #include <linux/memory-tiers.h>
> +#include <linux/notifier.h>
>
> #include "internal.h"
>
> @@ -105,6 +106,8 @@ static int top_tier_adistance;
> static struct demotion_nodes *node_demotion __read_mostly;
> #endif /* CONFIG_MIGRATION */
>
> +static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms);
> +
> static inline struct memory_tier *to_memory_tier(struct device *device)
> {
> return container_of(device, struct memory_tier, dev);
> @@ -592,6 +595,62 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
> }
> EXPORT_SYMBOL_GPL(clear_node_memory_type);
>
> +/**
> + * register_mt_adistance_algorithm() - Register memory tiering abstract distance algorithm
> + * @nb: The notifier block which describe the algorithm
> + *
> + * Return: 0 on success, errno on error.
> + *
> + * Every memory tiering abstract distance algorithm provider needs to
> + * register the algorithm with register_mt_adistance_algorithm(). To
> + * calculate the abstract distance for a specified memory node, the
> + * notifier function will be called unless some high priority
> + * algorithm has provided result. The prototype of the notifier
> + * function is as follows,
> + *
> + * int (*algorithm_notifier)(struct notifier_block *nb,
> + * unsigned long nid, void *data);
> + *
> + * Where "nid" specifies the memory node, "data" is the pointer to the
> + * returned abstract distance (that is, "int *adist"). If the
> + * algorithm provides the result, NOTIFY_STOP should be returned.
> + * Otherwise, return_value & %NOTIFY_STOP_MASK == 0 to allow the next
> + * algorithm in the chain to provide the result.
> + */
> +int register_mt_adistance_algorithm(struct notifier_block *nb)
> +{
> + return blocking_notifier_chain_register(&mt_adistance_algorithms, nb);
> +}
> +EXPORT_SYMBOL_GPL(register_mt_adistance_algorithm);
> +
> +/**
> + * unregister_mt_adistance_algorithm() - Unregister memory tiering abstract distance algorithm
> + * @nb: the notifier block which describe the algorithm
> + *
> + * Return: 0 on success, errno on error.
> + */
> +int unregister_mt_adistance_algorithm(struct notifier_block *nb)
> +{
> + return blocking_notifier_chain_unregister(&mt_adistance_algorithms, nb);
> +}
> +EXPORT_SYMBOL_GPL(unregister_mt_adistance_algorithm);
> +
> +/**
> + * mt_calc_adistance() - Calculate abstract distance with registered algorithms
> + * @node: the node to calculate abstract distance for
> + * @adist: the returned abstract distance
> + *
> + * Return: if return_value & %NOTIFY_STOP_MASK != 0, then some
> + * abstract distance algorithm provides the result, and return it via
> + * @adist. Otherwise, no algorithm can provide the result and @adist
> + * will be kept as it is.
> + */
> +int mt_calc_adistance(int node, int *adist)
> +{
> + return blocking_notifier_call_chain(&mt_adistance_algorithms, node, adist);
> +}
> +EXPORT_SYMBOL_GPL(mt_calc_adistance);
> +
> static int __meminit memtier_hotplug_callback(struct notifier_block *self,
> unsigned long action, void *_arg)
> {
Powered by blists - more mailing lists