[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20080701164844.EE7D11B4314@basil.firstfloor.org>
Date: Tue, 1 Jul 2008 18:48:44 +0200 (CEST)
From: Andi Kleen <andi@...stfloor.org>
To: venkatesh.pallipadi@...el.com, greg@...ah.com,
linux-kernel@...r.kernel.org
Subject: [PATCH] [4/4] Implement dynamic machine check banks support
[Note: this supercedes an earlier patch by Venki Pallipadi
8edc5cc5ec880c96de8e6686fb0d7a5231e91c05. Venki's patch should be reverted
first before applying this. Also it requires an earlier sysfs infrastructure
patch.]
This patch replaces the hardcoded max number of machine check banks with
dynamic allocation depending on what the CPU reports. The sysfs
data structures and the banks array are dynamically allocated.
There is still a hard bank limit (128) because the mcelog protocol uses
banks >= 128 as pseudo banks to escape other events. But we expect
that 128 banks is beyond any reasonable CPU for now.
Cc: Venki Pallipadi <venkatesh.pallipadi@...el.com>
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
arch/x86/kernel/cpu/mcheck/mce_64.c | 128 ++++++++++++++++++++++++++++++------
1 file changed, 110 insertions(+), 18 deletions(-)
Index: linux/arch/x86/kernel/cpu/mcheck/mce_64.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -23,6 +23,8 @@
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/kdebug.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@@ -32,7 +34,12 @@
#include "ancient.h"
#define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
+
+/*
+ * To support more than 128 would need to escape the predefined
+ * Linux defined extended banks first.
+ */
+#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
atomic_t mce_entry;
@@ -47,7 +54,7 @@ int mce_disabled __cpuinitdata;
*/
static int tolerant = 1;
static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static unsigned long *bank;
static unsigned long notify_user;
static int rip_msr;
static int mce_bootlog = -1;
@@ -426,20 +433,39 @@ static int dont_init_bank0;
/*
* Initialize Machine Checks for a CPU.
*/
-static void mce_init(void *dummy)
+static void mce_cap_init(void)
{
u64 cap;
- int i;
rdmsrl(MSR_IA32_MCG_CAP, cap);
- banks = cap & 0xff;
- if (banks > NR_BANKS) {
- printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- banks = NR_BANKS;
+ /* Handle the unlikely case of one CPU having less banks than others */
+ if (banks == 0 || banks > (cap & 0xff))
+ banks = cap & 0xff;
+ if (banks > MAX_NR_BANKS) {
+ printk(KERN_WARNING
+ "MCE: Using only %d machine check banks out of %u\n",
+ banks, MAX_NR_BANKS);
+ banks = MAX_NR_BANKS;
+ }
+ if (!bank) {
+ bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+ if (!bank)
+ return;
+ memset(bank, 0xff, banks * sizeof(u64));
}
+
/* Use accurate RIP reporting if available. */
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
rip_msr = MSR_IA32_MCG_EIP;
+}
+
+/*
+ * Initialize Machine Checks for a CPU.
+ */
+static void mce_init(void *dummy)
+{
+ int i;
+ u64 cap;
/* Log the machine checks left over from the previous reset.
This also clears all registers */
@@ -447,6 +473,7 @@ static void mce_init(void *dummy)
set_in_cr4(X86_CR4_MCE);
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
@@ -530,6 +557,7 @@ void __cpuinit mcheck_init(struct cpuinf
mce_ancient_init(c);
mce_cpu_quirks(c);
+ mce_cap_init();
if (mce_disabled ||
cpu_test_and_set(smp_processor_id(), mce_cpus) ||
@@ -780,13 +808,26 @@ DEFINE_PER_CPU(struct sys_device, device
} \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-/* TBD should generate these dynamically based on number of available banks */
-ACCESSOR(bank0ctl,bank[0],mce_restart())
-ACCESSOR(bank1ctl,bank[1],mce_restart())
-ACCESSOR(bank2ctl,bank[2],mce_restart())
-ACCESSOR(bank3ctl,bank[3],mce_restart())
-ACCESSOR(bank4ctl,bank[4],mce_restart())
-ACCESSOR(bank5ctl,bank[5],mce_restart())
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+ char *buf)
+{
+ u64 b = bank[attr - bank_attrs];
+ return sprintf(buf, "%Lx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+ const char *buf, size_t siz)
+{
+ char *end;
+ u64 new = simple_strtoull(buf, &end, 0);
+ if (end == buf)
+ return -EINVAL;
+ bank[attr - bank_attrs] = new;
+ mce_restart();
+ return end-buf;
+}
static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char *buf)
@@ -813,8 +854,6 @@ static SYSDEV_ATTR(trigger, 0644, show_t
static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
ACCESSOR(check_interval,check_interval,mce_restart())
static struct sysdev_attribute *mce_attributes[] = {
- &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
- &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
NULL
};
@@ -844,11 +883,22 @@ static __cpuinit int mce_create_device(u
if (err)
goto error;
}
+ for (i = 0; i < banks; i++) {
+ err = sysdev_create_file(&per_cpu(device_mce, cpu),
+ &bank_attrs[i]);
+ if (err)
+ goto error2;
+ }
cpu_set(cpu, mce_device_initialized);
return 0;
+error2:
+ while (--i >= 0) {
+ sysdev_remove_file(&per_cpu(device_mce, cpu),
+ &bank_attrs[i]);
+ }
error:
- while (i--) {
+ while (--i >= 0) {
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
}
@@ -867,6 +917,9 @@ static void mce_remove_device(unsigned i
for (i = 0; mce_attributes[i]; i++)
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
+ for (i = 0; i < banks; i++)
+ sysdev_remove_file(&per_cpu(device_mce, cpu),
+ &bank_attrs[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
cpu_clear(cpu, mce_device_initialized);
}
@@ -894,6 +947,34 @@ static struct notifier_block mce_cpu_not
.notifier_call = mce_cpu_callback,
};
+static __init int mce_init_banks(void)
+{
+ int i;
+
+ bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+ GFP_KERNEL);
+ if (!bank_attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < banks; i++) {
+ struct sysdev_attribute *a = &bank_attrs[i];
+ a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+ if (!a->attr.name)
+ goto nomem;
+ a->attr.mode = 0644;
+ a->show = show_bank;
+ a->store = set_bank;
+ }
+ return 0;
+
+nomem:
+ while (--i >= 0)
+ kfree(bank_attrs[i].attr.name);
+ kfree(bank_attrs);
+ bank_attrs = NULL;
+ return -ENOMEM;
+}
+
static __init int mce_init_device(void)
{
int err;
@@ -901,6 +982,11 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
+
+ err = mce_init_banks();
+ if (err)
+ return err;
+
err = sysdev_class_register(&mce_sysclass);
if (err)
return err;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists