linux-kernel - [PATCH v3 2/6] x86/MCE: Handle MCA controls in a per

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190430203206.104163-3-Yazen.Ghannam@amd.com>
Date:   Tue, 30 Apr 2019 20:32:18 +0000
From:   "Ghannam, Yazen" <Yazen.Ghannam@....com>
To:     "linux-edac@...r.kernel.org" <linux-edac@...r.kernel.org>
CC:     "Ghannam, Yazen" <Yazen.Ghannam@....com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "bp@...e.de" <bp@...e.de>,
        "tony.luck@...el.com" <tony.luck@...el.com>,
        "x86@...nel.org" <x86@...nel.org>
Subject: [PATCH v3 2/6] x86/MCE: Handle MCA controls in a per_cpu way

From: Yazen Ghannam <yazen.ghannam@....com>

Current AMD systems have unique MCA banks per logical CPU even though
the type of the banks may all align to the same bank number. Each CPU
will have control of a set of MCA banks in the hardware and these are
not shared with other CPUs.

For example, bank 0 may be the Load-Store Unit on every logical CPU, but
each bank 0 is a unique structure in the hardware. In other words, there
isn't a *single* Load-Store Unit at MCA bank 0 that all logical CPUs
share.

This idea extends even to non-core MCA banks. For example, CPU0 and CPU4
may see a Unified Memory Controller at bank 15, but each CPU is actually
seeing a unique hardware structure that is not shared with other CPUs.

Because the MCA banks are all unique hardware structures, it would be
good to control them in a more granular way. For example, if there is a
known issue with the Floating Point Unit on CPU5 and a user wishes to
disable an error type on the Floating Point Unit, then it would be good
to do this only for CPU5 rather than all CPUs.

Also, future AMD systems may have heterogeneous MCA banks. Meaning the
bank numbers may not necessarily represent the same types between CPUs.
For example, bank 20 visible to CPU0 may be a Unified Memory Controller
and bank 20 visible to CPU4 may be a Coherent Slave. So granular control
will be even more necessary should the user wish to control specific MCA
banks.

Split the device attributes from struct mce_bank leaving only the MCA
bank control fields.

Make struct mce_banks[] per_cpu in order to have more granular control
over individual MCA banks in the hardware.

Allocate the device attributes statically based on the maximum number of
MCA banks supported. The sysfs interface will use as many as needed per
CPU. Currently, this is set to mca_cfg.banks, but will be changed to a
per_cpu bank count in a future patch.

Allocate the MCA control bits dynamically. Use the maximum number of MCA
banks supported for now. This will be changed to a per_cpu bank count in
a future patch.

Redo the sysfs store/show functions to handle the per_cpu mce_banks[].

Signed-off-by: Yazen Ghannam <yazen.ghannam@....com>
---
Link:
https://lkml.kernel.org/r/20190411201743.43195-3-Yazen.Ghannam@amd.com

v2->v3:
* Keep old member alignment in struct mce_bank.
* Change "cpu" to "CPU" in modified comment.
* Use a local array pointer when doing multiple per_cpu accesses.

v1->v2:
* Change "struct mce_bank*" to "struct mce_bank *" in definition.

 arch/x86/kernel/cpu/mce/core.c | 59 ++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index ba5767dd5538..66347bdc8b08 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -64,16 +64,21 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
 
 DEFINE_PER_CPU(unsigned, mce_exception_count);
 
-#define ATTR_LEN               16
-/* One object for each MCE bank, shared by all CPUs */
 struct mce_bank {
 	u64			ctl;			/* subevents to enable */
 	bool			init;			/* initialise bank? */
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank *, mce_banks_percpu);
+
+#define ATTR_LEN               16
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank_dev {
 	struct device_attribute	attr;			/* device attribute */
 	char			attrname[ATTR_LEN];	/* attribute name */
+	u8			bank;			/* bank number */
 };
+static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
 
-static struct mce_bank *mce_banks __read_mostly;
 struct mce_vendor_flags mce_flags __read_mostly;
 
 struct mca_config mca_cfg __read_mostly = {
@@ -683,6 +688,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
  */
 bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	bool error_seen = false;
 	struct mce m;
 	int i;
@@ -1130,6 +1136,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
 			    unsigned long *toclear, unsigned long *valid_banks,
 			    int no_way_out, int *worst)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	struct mca_config *cfg = &mca_cfg;
 	int severity, i;
 
@@ -1473,6 +1480,7 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
 
 static int __mcheck_cpu_mce_banks_init(void)
 {
+	struct mce_bank *mce_banks;
 	int i;
 
 	mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
@@ -1485,6 +1493,8 @@ static int __mcheck_cpu_mce_banks_init(void)
 		b->ctl = -1ULL;
 		b->init = 1;
 	}
+
+	per_cpu(mce_banks_percpu, smp_processor_id()) = mce_banks;
 	return 0;
 }
 
@@ -1504,7 +1514,7 @@ static int __mcheck_cpu_cap_init(void)
 
 	mca_cfg.banks = max(mca_cfg.banks, b);
 
-	if (!mce_banks) {
+	if (!this_cpu_read(mce_banks_percpu)) {
 		int err = __mcheck_cpu_mce_banks_init();
 		if (err)
 			return err;
@@ -1544,6 +1554,7 @@ static void __mcheck_cpu_init_generic(void)
 
 static void __mcheck_cpu_init_clear_banks(void)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	int i;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
@@ -1587,6 +1598,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
 /* Add per CPU specific workarounds here */
 static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	struct mca_config *cfg = &mca_cfg;
 
 	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1957,6 +1969,7 @@ int __init mcheck_init(void)
  */
 static void mce_disable_error_reporting(void)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	int i;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
@@ -2059,26 +2072,41 @@ static struct bus_type mce_subsys = {
 
 DEFINE_PER_CPU(struct device *, mce_device);
 
-static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
+static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
 {
-	return container_of(attr, struct mce_bank, attr);
+	return container_of(attr, struct mce_bank_dev, attr);
 }
 
 static ssize_t show_bank(struct device *s, struct device_attribute *attr,
 			 char *buf)
 {
-	return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
+	u8 bank = attr_to_bank(attr)->bank;
+	struct mce_bank *b;
+
+	if (bank >= mca_cfg.banks)
+		return -EINVAL;
+
+	b = &per_cpu(mce_banks_percpu, s->id)[bank];
+
+	return sprintf(buf, "%llx\n", b->ctl);
 }
 
 static ssize_t set_bank(struct device *s, struct device_attribute *attr,
 			const char *buf, size_t size)
 {
+	u8 bank = attr_to_bank(attr)->bank;
+	struct mce_bank *b;
 	u64 new;
 
 	if (kstrtou64(buf, 0, &new) < 0)
 		return -EINVAL;
 
-	attr_to_bank(attr)->ctl = new;
+	if (bank >= mca_cfg.banks)
+		return -EINVAL;
+
+	b = &per_cpu(mce_banks_percpu, s->id)[bank];
+
+	b->ctl = new;
 	mce_restart();
 
 	return size;
@@ -2193,7 +2221,7 @@ static void mce_device_release(struct device *dev)
 	kfree(dev);
 }
 
-/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+/* Per CPU device init. All of the CPUs still share the same bank device: */
 static int mce_device_create(unsigned int cpu)
 {
 	struct device *dev;
@@ -2226,7 +2254,7 @@ static int mce_device_create(unsigned int cpu)
 			goto error;
 	}
 	for (j = 0; j < mca_cfg.banks; j++) {
-		err = device_create_file(dev, &mce_banks[j].attr);
+		err = device_create_file(dev, &mce_bank_devs[j].attr);
 		if (err)
 			goto error2;
 	}
@@ -2236,7 +2264,7 @@ static int mce_device_create(unsigned int cpu)
 	return 0;
 error2:
 	while (--j >= 0)
-		device_remove_file(dev, &mce_banks[j].attr);
+		device_remove_file(dev, &mce_bank_devs[j].attr);
 error:
 	while (--i >= 0)
 		device_remove_file(dev, mce_device_attrs[i]);
@@ -2258,7 +2286,7 @@ static void mce_device_remove(unsigned int cpu)
 		device_remove_file(dev, mce_device_attrs[i]);
 
 	for (i = 0; i < mca_cfg.banks; i++)
-		device_remove_file(dev, &mce_banks[i].attr);
+		device_remove_file(dev, &mce_bank_devs[i].attr);
 
 	device_unregister(dev);
 	cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2279,6 +2307,7 @@ static void mce_disable_cpu(void)
 
 static void mce_reenable_cpu(void)
 {
+	struct mce_bank *mce_banks = this_cpu_read(mce_banks_percpu);
 	int i;
 
 	if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2336,10 +2365,12 @@ static __init void mce_init_banks(void)
 {
 	int i;
 
-	for (i = 0; i < mca_cfg.banks; i++) {
-		struct mce_bank *b = &mce_banks[i];
+	for (i = 0; i < MAX_NR_BANKS; i++) {
+		struct mce_bank_dev *b = &mce_bank_devs[i];
 		struct device_attribute *a = &b->attr;
 
+		b->bank = i;
+
 		sysfs_attr_init(&a->attr);
 		a->attr.name	= b->attrname;
 		snprintf(b->attrname, ATTR_LEN, "bank%d", i);
-- 
2.17.1