[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4AC99B23.9090701@jp.fujitsu.com>
Date: Mon, 05 Oct 2009 16:07:15 +0900
From: Hidetoshi Seto <seto.hidetoshi@...fujitsu.com>
To: Huang Ying <ying.huang@...el.com>
CC: Ingo Molnar <mingo@...e.hu>, "H. Peter Anvin" <hpa@...or.com>,
Andi Kleen <ak@...ux.intel.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer
This is the diff between Huang's original change and the result of changes
by my patch set.
I'll going to explain what I changed from the original.
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 2d5c42a..4b5ef3c 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -52,7 +52,7 @@
> #define MCE_INJ_NMI_BROADCAST (1 << 2) /* do NMI broadcasting */
> #define MCE_INJ_EXCEPTION (1 << 3) /* raise as exception */
>
> -/* Fields are zero when not available */
> +/* MCE log entry. Fields are zero when not available. */
> struct mce {
> __u64 status;
> __u64 misc;
> @@ -63,12 +63,12 @@ struct mce {
> __u64 time; /* wall time_t when error was detected */
> __u8 cpuvendor; /* cpu vendor as encoded in system.h */
> __u8 inject_flags; /* software inject flags */
> - __u16 pad;
> + __u16 pad;
> __u32 cpuid; /* CPUID 1 EAX */
> - __u8 cs; /* code segment */
> + __u8 cs; /* code segment */
> __u8 bank; /* machine check bank */
> __u8 cpu; /* cpu number; obsolete; use extcpu now */
> - __u8 finished; /* entry is valid */
> + __u8 finished; /* 1 if write to entry is finished & entry is valid */
> __u32 extcpu; /* linux cpu number that detected the error */
> __u32 socketid; /* CPU socket ID */
> __u32 apicid; /* CPU initial apic ID */
> @@ -76,10 +76,9 @@ struct mce {
> };
>
> /*
> - * This structure contains all data related to the MCE log. Also
> - * carries a signature to make it easier to find from external
> - * debugging tools. Each entry is only valid when its finished flag
> - * is set.
> + * This structure contains all data related to the MCE log. Also carries
> + * a signature to make it easier to find from external debugging tools.
> + * Each entry is only valid when its finished flag is set.
> */
Small clean up.
>
> #define MCE_LOG_LEN 32
> @@ -93,12 +92,18 @@ static inline int mce_log_index(int n)
> struct mce_log_cpu;
>
> struct mce_log {
> - char signature[12]; /* "MACHINECHEC2" */
> - unsigned len; /* = MCE_LOG_LEN */
> - unsigned flags;
> - unsigned recordlen; /* length of struct mce */
> - unsigned nr_mcelog_cpus;
> + char signature[12]; /* "MACHINECHEC2" */
> +
> + /* points the table of per-CPU buffers */
> struct mce_log_cpu **mcelog_cpus;
> + unsigned int nr_mcelog_cpus; /* = num_possible_cpus() */
> +
> + /* spec of per-CPU buffer */
> + unsigned int header_len; /* offset of array "entry" */
> + unsigned int nr_record; /* array size (= MCE_LOG_LEN) */
> + unsigned int record_len; /* length of struct mce */
> +
> + unsigned flags;
> };
Add a member header_len, and renamed "len" to "nr_record" which is easier
to understand. Please refer the description of patch 6/10.
>
> #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 5db3f5b..fad3daa 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -122,54 +122,53 @@ EXPORT_PER_CPU_SYMBOL_GPL(mce_fake_banks);
> * separate MCEs from kernel messages to avoid bogus bug reports.
> */
>
> -static struct mce_log mcelog = {
> - .signature = MCE_LOG_SIGNATURE,
> - .len = MCE_LOG_LEN,
> - .recordlen = sizeof(struct mce),
> -};
> -
> struct mce_log_cpu {
> int head;
> int tail;
> - unsigned long flags;
> struct mce entry[MCE_LOG_LEN];
> };
Removed "flags" from per-CPU structure since mce_ioctl only cares global flags
in struct mce_log.
>
> DEFINE_PER_CPU(struct mce_log_cpu, mce_log_cpus);
>
> +static struct mce_log mcelog = {
> + .signature = MCE_LOG_SIGNATURE,
> + .header_len = offsetof(struct mce_log_cpu, entry),
> + .nr_record = MCE_LOG_LEN,
> + .record_len = sizeof(struct mce),
> +};
> +
> void mce_log(struct mce *mce)
> {
> struct mce_log_cpu *mcelog_cpu = &__get_cpu_var(mce_log_cpus);
> int head, ihead, tail, next;
>
> + /* mce->finished must be set to 0 before written to buffer */
> mce->finished = 0;
> - /*
> - * mce->finished must be set to 0 before written to ring
> - * buffer
> - */
> smp_wmb();
> +
> do {
> head = mcelog_cpu->head;
> tail = mcelog_cpu->tail;
> ihead = mce_log_index(head);
> +
> /*
> * When the buffer fills up discard new entries.
> * Assume that the earlier errors are the more
> * interesting.
> */
> if (ihead == mce_log_index(tail) && head != tail) {
> - set_bit(MCE_OVERFLOW, &mcelog_cpu->flags);
> + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
Use global flags which is cared by mce_ioctl.
> return;
> }
> next = head == MCE_LOG_LIMIT ? 0 : head + 1;
> } while (cmpxchg_local(&mcelog_cpu->head, head, next) != head);
> +
> memcpy(mcelog_cpu->entry + ihead, mce, sizeof(struct mce));
> - /*
> - * ".finished" of MCE record in ring buffer must be set after
> - * copy
> - */
> +
> + /* ".finished" of MCE record in buffer must be set after copy */
> smp_wmb();
> mcelog_cpu->entry[ihead].finished = 1;
> +
> /* bit 0 of notify_user should be set after finished be set */
> smp_wmb();
> mce->finished = 1;
Changes from here to ....
> @@ -1195,19 +1194,40 @@ int mce_notify_irq(void)
> }
> EXPORT_SYMBOL_GPL(mce_notify_irq);
>
> -static int mce_banks_init(void)
> +static __cpuinit int mce_banks_init(void)
> {
> int i;
>
> mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
> if (!mce_banks)
> return -ENOMEM;
> +
> for (i = 0; i < banks; i++) {
> struct mce_bank *b = &mce_banks[i];
>
> b->ctl = -1ULL;
> b->init = 1;
> }
> +
> + return 0;
> +}
> +
> +/*
> + * Initialize MCE per-CPU log buffer
> + */
> +static __cpuinit int mce_log_init(void)
> +{
> + int cpu;
> +
> + mcelog.nr_mcelog_cpus = num_possible_cpus();
> + mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> + GFP_KERNEL);
> + if (!mcelog.mcelog_cpus)
> + return -ENOMEM;
> +
> + for_each_possible_cpu(cpu)
> + mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> +
> return 0;
> }
>
> @@ -1234,6 +1254,7 @@ static int __cpuinit mce_cap_init(void)
> /* Don't support asymmetric configurations today */
> WARN_ON(banks != 0 && b != banks);
> banks = b;
> +
> if (!mce_banks) {
> int err = mce_banks_init();
>
> @@ -1241,6 +1262,13 @@ static int __cpuinit mce_cap_init(void)
> return err;
> }
>
> + if (!mcelog.mcelog_cpus) {
> + int err = mce_log_init();
> +
> + if (err)
> + return err;
> + }
> +
> /* Use accurate RIP reporting if available. */
> if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
> rip_msr = MSR_IA32_MCG_EIP;
> @@ -1251,22 +1279,6 @@ static int __cpuinit mce_cap_init(void)
> return 0;
> }
>
> -/*
> - * Initialize MCE per-CPU log buffer
> - */
> -static __cpuinit void mce_log_init(void)
> -{
> - int cpu;
> -
> - if (mcelog.mcelog_cpus)
> - return;
> - mcelog.nr_mcelog_cpus = num_possible_cpus();
> - mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> - GFP_KERNEL);
> - for_each_possible_cpu(cpu)
> - mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> -}
> -
> static void mce_init(void)
> {
> mce_banks_t all_banks;
> @@ -1437,7 +1449,6 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
> mce_disabled = 1;
> return;
> }
> - mce_log_init();
>
> machine_check_vector = do_machine_check;
>
... here are what done in patch 10/10.
> @@ -1486,15 +1497,14 @@ static int mce_release(struct inode *inode, struct file *file)
> return 0;
> }
>
> -static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
> - char __user *inubuf, size_t usize)
> +static ssize_t mce_read_cpu(int cpu, char __user *inubuf, size_t usize)
> {
> + struct mce_log_cpu *mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
Changed 1st arg to cpu number.
> char __user *ubuf = inubuf;
> int head, tail, pos, i, err = 0;
>
> head = mcelog_cpu->head;
> tail = mcelog_cpu->tail;
> -
> if (head == tail)
> return 0;
>
> @@ -1503,13 +1513,14 @@ static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
> i = mce_log_index(pos);
> if (!mcelog_cpu->entry[i].finished) {
> int timeout = WRITER_TIMEOUT_NS;
> +
> while (!mcelog_cpu->entry[i].finished) {
> if (timeout-- <= 0) {
> memset(mcelog_cpu->entry + i, 0,
> sizeof(struct mce));
> head = mcelog_cpu->head;
> printk(KERN_WARNING "mcelog: timeout "
> - "waiting for writer to finish!\n");
> + "waiting for writer to finish!\n");
> goto timeout;
> }
> ndelay(1);
> @@ -1538,11 +1549,6 @@ timeout:
> return err ? -EFAULT : ubuf - inubuf;
> }
>
> -static int mce_empty_cpu(struct mce_log_cpu *mcelog_cpu)
> -{
> - return mcelog_cpu->head == mcelog_cpu->tail;
> -}
> -
> static int mce_empty(void)
> {
> int cpu;
> @@ -1550,33 +1556,34 @@ static int mce_empty(void)
>
> for_each_possible_cpu(cpu) {
> mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> - if (!mce_empty_cpu(mcelog_cpu))
> + if (mcelog_cpu->head != mcelog_cpu->tail)
Inlined. Or it would be better to have static inlines in header file.
> return 0;
> }
> return 1;
> }
>
> +static DEFINE_MUTEX(mce_read_mutex);
> +
> static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
> loff_t *off)
> {
> char __user *ubuf = inubuf;
> - struct mce_log_cpu *mcelog_cpu;
> - int cpu, new_mce, err = 0;
> - static DEFINE_MUTEX(mce_read_mutex);
> + int cpu, err = 0;
> +
> + /* Only supports full reads right now */
> + if (*off != 0 || usize < sizeof(struct mce))
> + return -EINVAL;
Picked up what implicitly dropped.
>
> mutex_lock(&mce_read_mutex);
> - do {
> - new_mce = 0;
> +
> + while (!mce_empty()) {
> for_each_possible_cpu(cpu) {
> if (usize < sizeof(struct mce))
> goto out;
> - mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> - err = mce_read_cpu(mcelog_cpu, ubuf,
> - sizeof(struct mce));
> + err = mce_read_cpu(cpu, ubuf, sizeof(struct mce));
> if (err > 0) {
> ubuf += sizeof(struct mce);
> usize -= sizeof(struct mce);
> - new_mce = 1;
> err = 0;
> } else if (err < 0)
> goto out;
> @@ -1586,9 +1593,10 @@ static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
> cond_resched();
> mutex_lock(&mce_read_mutex);
> }
> - } while (new_mce || !mce_empty());
> + }
I could not catch the intent of "new_mce," so replaced do-while by
simple while statement.
> out:
> mutex_unlock(&mce_read_mutex);
> +
> return err ? err : ubuf - inubuf;
> }
>
That's all.
Thanks,
H.Seto
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists