linux-kernel - Re: [PATCH RT] x86/mce: Defer mce wakeups to threads for PREEMPT

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1365704877.9609.56.camel@gandalf.local.home>
Date:	Thu, 11 Apr 2013 14:27:57 -0400
From:	Steven Rostedt <rostedt@...dmis.org>
To:	LKML <linux-kernel@...r.kernel.org>
Cc:	RT <linux-rt-users@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Clark Williams <clark@...hat.com>,
	John Kacur <jkacur@...hat.com>,
	Tony Luck <tony.luck@...el.com>,
	Borislav Petkov <bp@...en8.de>,
	Mauro Carvalho Chehab <mchehab@...hat.com>,
	Ingo Molnar <mingo@...nel.org>,
	"H. Peter Anvin" <hpa@...ux.intel.com>
Subject: Re: [PATCH RT] x86/mce: Defer mce wakeups to threads for PREEMPT_RT

On Thu, 2013-04-11 at 14:23 -0400, Steven Rostedt wrote:

> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index e8d8ad0..060e473 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -18,6 +18,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/kobject.h>
>  #include <linux/uaccess.h>
> +#include <linux/kthread.h>
>  #include <linux/kdebug.h>
>  #include <linux/kernel.h>
>  #include <linux/percpu.h>
> @@ -1308,6 +1309,61 @@ static void mce_do_trigger(struct work_struct *work)
>  
>  static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
>  
> +static void __mce_notify_work(void)
> +{
> +	/* Not more than two messages every minute */
> +	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
> +
> +	/* wake processes polling /dev/mcelog */
> +	wake_up_interruptible(&mce_chrdev_wait);
> +
> +	/*
> +	 * There is no risk of missing notifications because
> +	 * work_pending is always cleared before the function is
> +	 * executed.
> +	 */
> +	if (mce_helper[0] && !work_pending(&mce_trigger_work))
> +		schedule_work(&mce_trigger_work);
> +
> +	if (__ratelimit(&ratelimit))
> +		pr_info(HW_ERR "Machine check events logged\n");
> +}
> +
> +#ifdef CONFIG_PREEMPT_RT_FULL
> +struct task_struct *mce_notify_helper;
> +
> +static int mce_notify_helper_thread(void *unused)
> +{
> +	while (!kthread_should_stop()) {
> +		__mce_notify_work();
> +		set_current_state(TASK_INTERRUPTIBLE);
> +		schedule();
> +	}
> +	return 0;
> +}
> +
> +static int mce_notify_work_init(void)
> +{
> +	mce_notify_helper = kthread_create(mce_notify_helper_thread, NULL,
> +					   "mce-notify");
> +	if (!mce_notify_helper)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void mce_notify_work()
> +{
> +	wake_up_process(mce_notify_helper);
> +}
> +#else
> +static void mce_notify_work(void)
> +{
> +	__mce_notify_work();
> +}
> +static inline int mce_notify_work_init(void) { return 0; }
> +#endif
> +
>  /*
>   * Notify the user(s) about new machine check events.
>   * Can be called from interrupt context, but not from machine check/NMI
> @@ -1315,24 +1371,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
>   */
>  int mce_notify_irq(void)
>  {
> -	/* Not more than two messages every minute */
> -	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
> -
>  	if (test_and_clear_bit(0, &mce_need_notify)) {
> -		/* wake processes polling /dev/mcelog */
> -		wake_up_interruptible(&mce_chrdev_wait);
> -
> -		/*
> -		 * There is no risk of missing notifications because
> -		 * work_pending is always cleared before the function is
> -		 * executed.
> -		 */
> -		if (mce_helper[0] && !work_pending(&mce_trigger_work))
> -			schedule_work(&mce_trigger_work);
> -
> -		if (__ratelimit(&ratelimit))
> -			pr_info(HW_ERR "Machine check events logged\n");
> -
> +		mce_notify_work();
>  		return 1;
>  	}
>  	return 0;
> @@ -2375,6 +2415,8 @@ static __init int mcheck_init_device(void)
>  	/* register character device /dev/mcelog */
>  	misc_register(&mce_chrdev_device);
>  
> +	err = mce_notify_work_init();
> +
>  	return err;
>  }


Ignore the below, I didn't realize I had other changes when I did my git
diff to make this patch :-/

-- Steve

>  device_initcall_sync(mcheck_init_device);
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index 33e5d14..120c790 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -198,34 +198,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
>  }
>  EXPORT_SYMBOL_GPL(start_thread);
>  
> -#ifdef CONFIG_PREEMPT_RT_FULL
> -static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
> -{
> -	int i;
> -
> -	/*
> -	 * Clear @prev's kmap_atomic mappings
> -	 */
> -	for (i = 0; i < prev_p->kmap_idx; i++) {
> -		int idx = i + KM_TYPE_NR * smp_processor_id();
> -		pte_t *ptep = kmap_pte - idx;
> -
> -		kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
> -	}
> -	/*
> -	 * Restore @next_p's kmap_atomic mappings
> -	 */
> -	for (i = 0; i < next_p->kmap_idx; i++) {
> -		int idx = i + KM_TYPE_NR * smp_processor_id();
> -
> -		set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
> -	}
> -}
> -#else
> -static inline void
> -switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
> -#endif
> -
>  
>  /*
>   *	switch_to(x,y) should switch tasks from x to y.
> @@ -305,7 +277,40 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>  		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
>  		__switch_to_xtra(prev_p, next_p, tss);
>  
> -	switch_kmaps(prev_p, next_p);
> +#ifdef CONFIG_PREEMPT_RT_FULL
> +	/*
> +	 * Save @prev's kmap_atomic stack
> +	 */
> +	prev_p->kmap_idx = __this_cpu_read(__kmap_atomic_idx);
> +	if (unlikely(prev_p->kmap_idx)) {
> +		int i;
> +
> +		for (i = 0; i < prev_p->kmap_idx; i++) {
> +			int idx = i + KM_TYPE_NR * smp_processor_id();
> +
> +			pte_t *ptep = kmap_pte - idx;
> +			prev_p->kmap_pte[i] = *ptep;
> +			kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
> +		}
> +
> +		__this_cpu_write(__kmap_atomic_idx, 0);
> +	}
> +
> +	/*
> +	 * Restore @next_p's kmap_atomic stack
> +	 */
> +	if (unlikely(next_p->kmap_idx)) {
> +		int i;
> +
> +		__this_cpu_write(__kmap_atomic_idx, next_p->kmap_idx);
> +
> +		for (i = 0; i < next_p->kmap_idx; i++) {
> +			int idx = i + KM_TYPE_NR * smp_processor_id();
> +
> +			set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
> +		}
> +	}
> +#endif
>  
>  	/*
>  	 * Leave lazy mode, flushing any hypercalls made here.
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/