lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <462eb5cb-56bc-4639-9029-5166cb71b5bf@nvidia.com>
Date: Fri, 6 Jun 2025 17:45:58 -0700
From: Fenghua Yu <fenghuay@...dia.com>
To: Tony Luck <tony.luck@...el.com>,
 Reinette Chatre <reinette.chatre@...el.com>,
 Maciej Wieczor-Retman <maciej.wieczor-retman@...el.com>,
 Peter Newman <peternewman@...gle.com>, James Morse <james.morse@....com>,
 Babu Moger <babu.moger@....com>, Drew Fustini <dfustini@...libre.com>,
 Dave Martin <Dave.Martin@....com>,
 Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>,
 Chen Yu <yu.c.chen@...el.com>
Cc: x86@...nel.org, linux-kernel@...r.kernel.org, patches@...ts.linux.dev
Subject: Re: [PATCH v5 04/29] x86,fs/resctrl: Prepare for more monitor events

Hi, Tony,

On 5/21/25 15:50, Tony Luck wrote:
> There's a rule in computer programming that objects appear zero,
> once, or many times. So code accordingly.
>
> There are two MBM events and resctrl is coded with a lot of
>
>          if (local)
>                  do one thing
>          if (total)
>                  do a different thing
>
> Change the rdt_mon_domain and rdt_hw_mon_domain structures to hold arrays
> of pointers to per event data instead of explicit fields for total and
> local bandwidth.
>
> Simplify by coding for many events using loops on which are enabled.
>
> Move resctrl_is_mbm_event() to <linux/resctrl.h> so it can be used more
> widely. Also provide a for_each_mbm_event() helper macro.
>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>   include/linux/resctrl.h                | 15 +++++---
>   include/linux/resctrl_types.h          |  3 ++
>   arch/x86/kernel/cpu/resctrl/internal.h |  6 ++--
>   arch/x86/kernel/cpu/resctrl/core.c     | 38 ++++++++++----------
>   arch/x86/kernel/cpu/resctrl/monitor.c  | 36 +++++++++----------
>   fs/resctrl/monitor.c                   | 13 ++++---
>   fs/resctrl/rdtgroup.c                  | 48 ++++++++++++--------------
>   7 files changed, 82 insertions(+), 77 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 843ad7c8e247..40f2d0d48d02 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -161,8 +161,7 @@ struct rdt_ctrl_domain {
>    * @hdr:		common header for different domain types
>    * @ci:			cache info for this domain
>    * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
> - * @mbm_total:		saved state for MBM total bandwidth
> - * @mbm_local:		saved state for MBM local bandwidth
> + * @mbm_states:		saved state for each QOS MBM event
>    * @mbm_over:		worker to periodically read MBM h/w counters
>    * @cqm_limbo:		worker to periodically read CQM h/w counters
>    * @mbm_work_cpu:	worker CPU for MBM h/w counters
> @@ -172,8 +171,7 @@ struct rdt_mon_domain {
>   	struct rdt_domain_hdr		hdr;
>   	struct cacheinfo		*ci;
>   	unsigned long			*rmid_busy_llc;
> -	struct mbm_state		*mbm_total;
> -	struct mbm_state		*mbm_local;
> +	struct mbm_state		*mbm_states[QOS_NUM_L3_MBM_EVENTS];
>   	struct delayed_work		mbm_over;
>   	struct delayed_work		cqm_limbo;
>   	int				mbm_work_cpu;
> @@ -376,6 +374,15 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id evt);
>   
>   bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
>   
> +static inline bool resctrl_is_mbm_event(enum resctrl_event_id e)
> +{
> +	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
> +		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
> +}
> +
> +#define for_each_mbm_event(evt)	\
> +	for (evt = QOS_L3_MBM_TOTAL_EVENT_ID; evt <= QOS_L3_MBM_LOCAL_EVENT_ID; evt++)
> +
>   /**
>    * resctrl_arch_mon_event_config_write() - Write the config for an event.
>    * @config_info: struct resctrl_mon_config_info describing the resource, domain
> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> index a25fb9c4070d..b468bfbab9ea 100644
> --- a/include/linux/resctrl_types.h
> +++ b/include/linux/resctrl_types.h
> @@ -47,4 +47,7 @@ enum resctrl_event_id {
>   	QOS_NUM_EVENTS,
>   };
>   
> +#define QOS_NUM_L3_MBM_EVENTS	(QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1)
> +#define MBM_STATE_IDX(evt)	((evt) - QOS_L3_MBM_TOTAL_EVENT_ID)
> +
>   #endif /* __LINUX_RESCTRL_TYPES_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 5e3c41b36437..ea185b4d0d59 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -54,15 +54,13 @@ struct rdt_hw_ctrl_domain {
>    * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
>    *			      a resource for a monitor function
>    * @d_resctrl:	Properties exposed to the resctrl file system
> - * @arch_mbm_total:	arch private state for MBM total bandwidth
> - * @arch_mbm_local:	arch private state for MBM local bandwidth
> + * @arch_mbm_states:	arch private state for each MBM event
>    *
>    * Members of this structure are accessed via helpers that provide abstraction.
>    */
>   struct rdt_hw_mon_domain {
>   	struct rdt_mon_domain		d_resctrl;
> -	struct arch_mbm_state		*arch_mbm_total;
> -	struct arch_mbm_state		*arch_mbm_local;
> +	struct arch_mbm_state		*arch_mbm_states[QOS_NUM_L3_MBM_EVENTS];
>   };
>   
>   static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 819bc7a09327..4403a820db12 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -364,8 +364,8 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
>   
>   static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
>   {
> -	kfree(hw_dom->arch_mbm_total);
> -	kfree(hw_dom->arch_mbm_local);
> +	for (int i = 0; i < QOS_NUM_L3_MBM_EVENTS; i++)
> +		kfree(hw_dom->arch_mbm_states[i]);

Is it better to define a helper for_each_mon_event_idx(i)?

#define for_each_mbm_event_idx(i) \

     for (int i = 0; i < QOS_NUM_L3_MBM_EVENTS; i++)

Then the above for loop can be simplified to:

     for_each_mbm_event_idxd(i)

	kfree(hw_dom->arch_mbm_states[i]);

The helper can be used in other places as well (see below).

>   	kfree(hw_dom);
>   }
>   
> @@ -399,25 +399,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *
>    */
>   static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
>   {
> -	size_t tsize;
> -
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) {
> -		tsize = sizeof(*hw_dom->arch_mbm_total);
> -		hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
> -		if (!hw_dom->arch_mbm_total)
> -			return -ENOMEM;
> -	}
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) {
> -		tsize = sizeof(*hw_dom->arch_mbm_local);
> -		hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
> -		if (!hw_dom->arch_mbm_local) {
> -			kfree(hw_dom->arch_mbm_total);
> -			hw_dom->arch_mbm_total = NULL;
> -			return -ENOMEM;
> -		}
> +	size_t tsize = sizeof(struct arch_mbm_state);
> +	enum resctrl_event_id evt;
> +	int idx;
> +
> +	for_each_mbm_event(evt) {
> +		if (!resctrl_is_mon_event_enabled(evt))
> +			continue;
> +		idx = MBM_STATE_IDX(evt);
> +		hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL);
> +		if (!hw_dom->arch_mbm_states[idx])
> +			goto cleanup;
>   	}
>   
>   	return 0;
> +cleanup:
> +	while (--idx >= 0) {
> +		kfree(hw_dom->arch_mbm_states[idx]);
> +		hw_dom->arch_mbm_states[idx] = NULL;
> +	}
> +
> +	return -ENOMEM;
>   }
>   
>   static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index fda579251dba..85526e5540f2 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -160,18 +160,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
>   						 u32 rmid,
>   						 enum resctrl_event_id eventid)
>   {
> -	switch (eventid) {
> -	case QOS_L3_OCCUP_EVENT_ID:
> -		return NULL;
> -	case QOS_L3_MBM_TOTAL_EVENT_ID:
> -		return &hw_dom->arch_mbm_total[rmid];
> -	case QOS_L3_MBM_LOCAL_EVENT_ID:
> -		return &hw_dom->arch_mbm_local[rmid];
> -	default:
> -		/* Never expect to get here */
> -		WARN_ON_ONCE(1);
> +	struct arch_mbm_state *state;
> +
> +	if (!resctrl_is_mbm_event(eventid))
>   		return NULL;
> -	}
> +
> +	state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)];
> +
> +	return state ? &state[rmid] : NULL;
>   }
>   
>   void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> @@ -200,14 +196,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>   void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
>   {
>   	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
> -
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
> -		memset(hw_dom->arch_mbm_total, 0,
> -		       sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
> -
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
> -		memset(hw_dom->arch_mbm_local, 0,
> -		       sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
> +	enum resctrl_event_id evt;
> +	int idx;
> +
> +	for_each_mbm_event(evt) {
> +		idx = MBM_STATE_IDX(evt);
> +		if (!hw_dom->arch_mbm_states[idx])
> +			continue;
> +		memset(hw_dom->arch_mbm_states[idx], 0,
> +		       sizeof(struct arch_mbm_state) * r->num_rmid);
> +	}
>   }
>   
>   static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index 325e23c1a403..4cd0789998bf 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -346,15 +346,14 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>   				       u32 rmid, enum resctrl_event_id evtid)
>   {
>   	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> +	struct mbm_state *states;
>   
> -	switch (evtid) {
> -	case QOS_L3_MBM_TOTAL_EVENT_ID:
> -		return &d->mbm_total[idx];
> -	case QOS_L3_MBM_LOCAL_EVENT_ID:
> -		return &d->mbm_local[idx];
> -	default:
> +	if (!resctrl_is_mbm_event(evtid))
>   		return NULL;
> -	}
> +
> +	states = d->mbm_states[MBM_STATE_IDX(evtid)];
> +
> +	return states ? &states[idx] : NULL;
>   }
>   
>   static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index 80e74940281a..8649b89d7bfd 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -127,12 +127,6 @@ static bool resctrl_is_mbm_enabled(void)
>   		resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID));
>   }
>   
> -static bool resctrl_is_mbm_event(int e)
> -{
> -	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
> -		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
> -}
> -
>   /*
>    * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap
>    * of free CLOSIDs.
> @@ -4020,8 +4014,10 @@ static void rdtgroup_setup_default(void)
>   static void domain_destroy_mon_state(struct rdt_mon_domain *d)
>   {
>   	bitmap_free(d->rmid_busy_llc);
> -	kfree(d->mbm_total);
> -	kfree(d->mbm_local);
> +	for (int i = 0; i < QOS_NUM_L3_MBM_EVENTS; i++) {

The for loop can be simplified by the new helper:

                 for_each_mbm_event_idx(i) {

> +		kfree(d->mbm_states[i]);
> +		d->mbm_states[i] = NULL;
> +	}
>   }
>   
>   void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
> @@ -4081,32 +4077,34 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
>   static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
>   {
>   	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> -	size_t tsize;
> +	size_t tsize = sizeof(struct mbm_state);
> +	enum resctrl_event_id evt;
> +	int idx;
>   
>   	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) {
>   		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
>   		if (!d->rmid_busy_llc)
>   			return -ENOMEM;
>   	}
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) {
> -		tsize = sizeof(*d->mbm_total);
> -		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
> -		if (!d->mbm_total) {
> -			bitmap_free(d->rmid_busy_llc);
> -			return -ENOMEM;
> -		}
> -	}
> -	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) {
> -		tsize = sizeof(*d->mbm_local);
> -		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
> -		if (!d->mbm_local) {
> -			bitmap_free(d->rmid_busy_llc);
> -			kfree(d->mbm_total);
> -			return -ENOMEM;
> -		}
> +
> +	for_each_mbm_event(evt) {
> +		if (!resctrl_is_mon_event_enabled(evt))
> +			continue;
> +		idx = MBM_STATE_IDX(evt);
> +		d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL);
> +		if (!d->mbm_states[idx])
> +			goto cleanup;
>   	}
>   
>   	return 0;
> +cleanup:
> +	bitmap_free(d->rmid_busy_llc);
> +	while (--idx >= 0) {
> +		kfree(d->mbm_states[idx]);
> +		d->mbm_states[idx] = NULL;
> +	}
> +
> +	return -ENOMEM;
>   }
>   
>   int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)

for_each_mbm_event() can simplified mbm_update() as well?

-        if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
-               mbm_update_one_event(r, d, closid, rmid, 
QOS_L3_MBM_TOTAL_EVENT_ID);
-
-        if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
-                mbm_update_one_event(r, d, closid, rmid, 
QOS_L3_MBM_LOCAL_EVENT_ID);
+        for_each_mbm_event_idx(evt) {
+                if (resctrl_is_mon_event_enabled(evt))
+                        mbm_update_one_event(r, d, closid, rmid, evt);

Thanks.

-Fenghua


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ