lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Thu, 11 Apr 2019 20:18:03 +0000 From: "Ghannam, Yazen" <Yazen.Ghannam@....com> To: "linux-edac@...r.kernel.org" <linux-edac@...r.kernel.org> CC: "Ghannam, Yazen" <Yazen.Ghannam@....com>, "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, "bp@...e.de" <bp@...e.de>, "tony.luck@...el.com" <tony.luck@...el.com>, "x86@...nel.org" <x86@...nel.org> Subject: [PATCH v2 4/6] x86/MCE: Make number of MCA banks per_cpu From: Yazen Ghannam <yazen.ghannam@....com> The number of MCA banks is provided per logical CPU. Historically, this number has been the same across all CPUs, but this is not an architectural guarantee. Future AMD systems may have MCA bank counts that vary between logical CPUs in a system. This issue was partially addressed in commit ("006c077041dc x86/mce: Handle varying MCA bank counts") by allocating structures using the maximum number of MCA banks and by saving the maximum MCA bank count in a system as the global count. This means that some extra structures are allocated. Also, this means that CPUs will spend more time in the #MC and other handlers checking extra MCA banks. Define the number of MCA banks as a per_cpu variable. Replace all uses of mca_cfg.banks with this. Also, use the per_cpu bank count when allocating per_cpu structures. Print the number of banks per CPU as a debug message for those who may be interested. Signed-off-by: Yazen Ghannam <yazen.ghannam@....com> --- Link: https://lkml.kernel.org/r/20190408141205.12376-5-Yazen.Ghannam@amd.com v1->v2: * Drop export of new variable and leave injector code as-is. * Add "mce_" prefix to new "num_banks" variable. arch/x86/kernel/cpu/mce/amd.c | 16 +++++----- arch/x86/kernel/cpu/mce/core.c | 48 +++++++++++++++++------------- arch/x86/kernel/cpu/mce/internal.h | 2 +- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index f0644b59848d..2aed94f3a23e 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -493,7 +493,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, { u32 addr = 0, offset = 0; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; if (mce_flags.smca) @@ -605,7 +605,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) disable_err_thresholding(c); - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); @@ -948,7 +948,7 @@ static void amd_deferred_error_interrupt(void) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) log_error_deferred(bank); } @@ -989,7 +989,7 @@ static void amd_threshold_interrupt(void) struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; unsigned int bank, cpu = smp_processor_id(); - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1176,7 +1176,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, u32 low, high; int err; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return 0; if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) @@ -1410,7 +1410,7 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); @@ -1431,14 +1431,14 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), + bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; per_cpu(threshold_banks, cpu) = bp; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; err = threshold_create_bank(cpu, bank); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index aa41f41e5931..0fe29140ecab 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -64,6 +64,8 @@ static DEFINE_MUTEX(mce_sysfs_mutex); DEFINE_PER_CPU(unsigned, mce_exception_count); +DEFINE_PER_CPU_READ_MOSTLY(u8, mce_num_banks); + struct mce_bank { u64 ctl; /* subevents to enable */ bool init; /* initialise bank? */ @@ -699,7 +701,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (flags & MCP_TIMESTAMP) m.tsc = rdtsc(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (!this_cpu_read(mce_banks)[i].ctl || !test_bit(i, *b)) continue; @@ -801,7 +803,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, char *tmp; int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { m->status = mce_rdmsrl(msr_ops.status(i)); if (!(m->status & MCI_STATUS_VAL)) continue; @@ -1081,7 +1083,7 @@ static void mce_clear_state(unsigned long *toclear) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (test_bit(i, toclear)) mce_wrmsrl(msr_ops.status(i), 0); } @@ -1138,7 +1140,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, struct mca_config *cfg = &mca_cfg; int severity, i; - for (i = 0; i < cfg->banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1478,15 +1480,16 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { + u8 n_banks = this_cpu_read(mce_num_banks); int i; per_cpu(mce_banks, smp_processor_id()) = - kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); + kcalloc(n_banks, sizeof(struct mce_bank), GFP_KERNEL); if (!this_cpu_read(mce_banks)) return -ENOMEM; - for (i = 0; i < MAX_NR_BANKS; i++) { + for (i = 0; i < n_banks; i++) { struct mce_bank *b = &this_cpu_read(mce_banks)[i]; b->ctl = -1ULL; @@ -1507,10 +1510,15 @@ static int __mcheck_cpu_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (WARN_ON_ONCE(b > MAX_NR_BANKS)) + pr_debug("CPU%d supports %d MCE banks\n", smp_processor_id(), b); + + if (b > MAX_NR_BANKS) { + pr_warn("CPU%d: Using only %u machine check banks out of %u\n", + smp_processor_id(), MAX_NR_BANKS, b); b = MAX_NR_BANKS; + } - mca_cfg.banks = max(mca_cfg.banks, b); + this_cpu_write(mce_num_banks, b); if (!this_cpu_read(mce_banks)) { int err = __mcheck_cpu_mce_banks_init(); @@ -1554,7 +1562,7 @@ static void __mcheck_cpu_init_clear_banks(void) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &this_cpu_read(mce_banks)[i]; if (!b->init) @@ -1604,7 +1612,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && cfg->banks > 4) { + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* * disable GART TBL walk error reporting, which * trips off incorrectly with the IOMMU & 3ware @@ -1623,7 +1631,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && cfg->banks > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) this_cpu_read(mce_banks)[0].ctl = 0; /* @@ -1645,7 +1653,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) this_cpu_read(mce_banks)[0].init = 0; /* @@ -1871,7 +1879,7 @@ static void __mce_disable_bank(void *arg) void mce_disable_bank(int bank) { - if (bank >= mca_cfg.banks) { + if (bank >= this_cpu_read(mce_num_banks)) { pr_warn(FW_BUG "Ignoring request to disable invalid MCA bank %d.\n", bank); @@ -1959,7 +1967,7 @@ static void mce_disable_error_reporting(void) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &this_cpu_read(mce_banks)[i]; if (b->init) @@ -2070,7 +2078,7 @@ static ssize_t show_bank(struct device *s, struct device_attribute *attr, struct mce_bank *b; u8 bank = attr_to_bank(attr)->bank; - if (bank >= mca_cfg.banks) + if (bank >= per_cpu(mce_num_banks, s->id)) return -EINVAL; b = &per_cpu(mce_banks, s->id)[bank]; @@ -2088,7 +2096,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; - if (bank >= mca_cfg.banks) + if (bank >= per_cpu(mce_num_banks, s->id)) return -EINVAL; b = &per_cpu(mce_banks, s->id)[bank]; @@ -2240,7 +2248,7 @@ static int mce_device_create(unsigned int cpu) if (err) goto error; } - for (j = 0; j < mca_cfg.banks; j++) { + for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) { err = device_create_file(dev, &mce_bank_devs[j].attr); if (err) goto error2; @@ -2272,7 +2280,7 @@ static void mce_device_remove(unsigned int cpu) for (i = 0; mce_device_attrs[i]; i++) device_remove_file(dev, mce_device_attrs[i]); - for (i = 0; i < mca_cfg.banks; i++) + for (i = 0; i < per_cpu(mce_num_banks, cpu); i++) device_remove_file(dev, &mce_bank_devs[i].attr); device_unregister(dev); @@ -2301,7 +2309,7 @@ static void mce_reenable_cpu(void) if (!cpuhp_tasks_frozen) cmci_reenable(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &this_cpu_read(mce_banks)[i]; if (b->init) @@ -2489,8 +2497,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { - pr_info("Using %d MCE banks\n", mca_cfg.banks); - if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 032d52c66616..632e2e57c1d0 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -118,7 +118,6 @@ struct mca_config { bios_cmci_threshold : 1, __reserved : 59; - u8 banks; s8 bootlog; int tolerant; int monarch_timeout; @@ -127,6 +126,7 @@ struct mca_config { }; extern struct mca_config mca_cfg; +DECLARE_PER_CPU_READ_MOSTLY(u8, mce_num_banks); struct mce_vendor_flags { /* -- 2.17.1
Powered by blists - more mailing lists