lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <IA1PR11MB609702742C92B316A78D73459B1F9@IA1PR11MB6097.namprd11.prod.outlook.com>
Date:   Sat, 10 Dec 2022 02:18:18 +0000
From:   "Yu, Fenghua" <fenghua.yu@...el.com>
To:     Shawn Wang <shawnwang@...ux.alibaba.com>,
        "Chatre, Reinette" <reinette.chatre@...el.com>
CC:     "bp@...en8.de" <bp@...en8.de>,
        "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
        "hpa@...or.com" <hpa@...or.com>,
        "james.morse@....com" <james.morse@....com>,
        "jamie@...iainc.com" <jamie@...iainc.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "x86@...nel.org" <x86@...nel.org>
Subject: RE: [PATCH v4] x86/resctrl: Clear staged_config[] before and after it
 is used

Hi, Shawn,

> As a temporary storage, staged_config[] in rdt_domain should be cleared before
> and after it is used. The stale value in staged_config[] could cause an MSR
> access error.

Why should staged_config[] be cleared both before and after it's used?
If it's cleared only before it's used, does it make more sense?
Usually temp variables are initialized before they are used and their values will
be ignored after usage. Especially clearing stage_config[] in double for
loop is pretty heavy code and an extra clearing stage_config[] after usage
takes unnecessary longer time.

> access error.
> 
> Here is a reproducer on a system with 16 usable CLOSIDs for a 15-way L3 Cache
> (MBA should be disabled if the number of CLOSIDs for MB is less than
> 16.) :
> 	mount -t resctrl resctrl -o cdp /sys/fs/resctrl
> 	mkdir /sys/fs/resctrl/p{1..7}
> 	umount /sys/fs/resctrl/
> 	mount -t resctrl resctrl /sys/fs/resctrl
> 	mkdir /sys/fs/resctrl/p{1..8}
> 
> An error occurs when creating resource group named p8:
>     unchecked MSR access error: WRMSR to 0xca0 (tried to write
> 0x00000000000007ff) at rIP: 0xffffffff82249142 (cat_wrmsr+0x32/0x60)
>     Call Trace:
>      <IRQ>
>      __flush_smp_call_function_queue+0x11d/0x170
>      __sysvec_call_function+0x24/0xd0
>      sysvec_call_function+0x89/0xc0
>      </IRQ>
>      <TASK>
>      asm_sysvec_call_function+0x16/0x20
> 
> When creating a new resource control group, hardware will be configured by
> the following process:
>     rdtgroup_mkdir()
>       rdtgroup_mkdir_ctrl_mon()
>         rdtgroup_init_alloc()
>           resctrl_arch_update_domains()
> 
> resctrl_arch_update_domains() iterates and updates all resctrl_conf_type whose
> have_new_ctrl is true. Since staged_config[] holds the same values as when CDP
> was enabled, it will continue to update the CDP_CODE and CDP_DATA
> configurations. When group p8 is created, get_config_index() called in
> resctrl_arch_update_domains() will return 16 and 17 as the CLOSIDs for
> CDP_CODE and CDP_DATA, which will be translated to an invalid register -
> 0xca0 in this scenario.
> 
> Fix it by clearing staged_config[] before and after it is used.
> 
> Fixes: 75408e43509e ("x86/resctrl: Allow different CODE/DATA configurations
> to be staged")
> Cc: <stable@...r.kernel.org>
> Signed-off-by: Shawn Wang <shawnwang@...ux.alibaba.com>
> Suggested-by: Xin Hao <xhao@...ux.alibaba.com>
> ---
> Changes since v3:
> - Update the commit message suggested by Reiniette Chatre.
> - Rename resctrl_staged_configs_clear() to rdt_staged_configs_clear().
> - Move rdt_staged_configs_clear() to arch/x86/kernel/cpu/resctrl/internal.h.
> 
> Changes since v2:
> - Update the commit message suggested by Reiniette Chatre.
> - Make the clearing work more robust.
> 
> Changes since v1:
> - Move the clearing from schemata_list_destroy() to
> resctrl_arch_update_domains().
> - Update the commit message suggested by Reiniette Chatre.
> - Add stable tag suggested by James Morse.
> ---
>  arch/x86/kernel/cpu/resctrl/ctrlmondata.c |  7 ++-----
>  arch/x86/kernel/cpu/resctrl/internal.h    |  1 +
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 25 +++++++++++++++++++----
>  3 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> index 1dafbdc5ac31..84f23327caed 100644
> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> @@ -374,7 +374,6 @@ ssize_t rdtgroup_schemata_write(struct
> kernfs_open_file *of,  {
>  	struct resctrl_schema *s;
>  	struct rdtgroup *rdtgrp;
> -	struct rdt_domain *dom;
>  	struct rdt_resource *r;
>  	char *tok, *resname;
>  	int ret = 0;
> @@ -403,10 +402,7 @@ ssize_t rdtgroup_schemata_write(struct
> kernfs_open_file *of,
>  		goto out;
>  	}
> 
> -	list_for_each_entry(s, &resctrl_schema_all, list) {
> -		list_for_each_entry(dom, &s->res->domains, list)
> -			memset(dom->staged_config, 0, sizeof(dom-
> >staged_config));
> -	}
> +	rdt_staged_configs_clear();
> 
>  	while ((tok = strsep(&buf, "\n")) != NULL) {
>  		resname = strim(strsep(&tok, ":"));
> @@ -451,6 +447,7 @@ ssize_t rdtgroup_schemata_write(struct
> kernfs_open_file *of,
>  	}
> 
>  out:
> +	rdt_staged_configs_clear();
>  	rdtgroup_kn_unlock(of->kn);
>  	cpus_read_unlock();
>  	return ret ?: nbytes;
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 5f7128686cfd..0b5c6c76f6f7 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -537,5 +537,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct
> rdt_domain *d);  void __check_limbo(struct rdt_domain *d, bool force_free);
> void rdt_domain_reconfigure_cdp(struct rdt_resource *r);  void __init
> thread_throttle_mode_init(void);
> +void rdt_staged_configs_clear(void);
> 
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git
> a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e5a48f05e787..fee8ed86b31c 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...)
>  	va_end(ap);
>  }
> 
> +void rdt_staged_configs_clear(void)
> +{
> +	struct rdt_resource *r;
> +	struct rdt_domain *dom;

Please reorder the variable definitions in the reversed Christmas tree order.

> +
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	for_each_alloc_capable_rdt_resource(r) {
> +		list_for_each_entry(dom, &r->domains, list)
> +			memset(dom->staged_config, 0, sizeof(dom-
> >staged_config));
> +	}
> +}
> +
>  /*
>   * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
>   * we can keep a bitmap of free CLOSIDs in a single integer.
> @@ -2841,7 +2854,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
> {
>  	struct resctrl_schema *s;
>  	struct rdt_resource *r;
> -	int ret;
> +	int ret = 0;
> +
> +	rdt_staged_configs_clear();
> 
>  	list_for_each_entry(s, &resctrl_schema_all, list) {
>  		r = s->res;
> @@ -2852,20 +2867,22 @@ static int rdtgroup_init_alloc(struct rdtgroup
> *rdtgrp)
>  		} else {
>  			ret = rdtgroup_init_cat(s, rdtgrp->closid);
>  			if (ret < 0)
> -				return ret;
> +				goto out;
>  		}
> 
>  		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
>  		if (ret < 0) {
>  			rdt_last_cmd_puts("Failed to initialize allocations\n");
> -			return ret;
> +			goto out;
>  		}
> 
>  	}
> 
>  	rdtgrp->mode = RDT_MODE_SHAREABLE;
> 
> -	return 0;
> +out:
> +	rdt_staged_configs_clear();
> +	return ret;
>  }
> 
>  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
> --
> 2.27.0

Thanks.

-Fenghua

Powered by blists - more mailing lists