[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87y2lth4qa.fsf@nanos.tec.linutronix.de>
Date: Tue, 01 Sep 2020 17:54:53 +0200
From: Thomas Gleixner <tglx@...utronix.de>
To: Julien Desfossez <jdesfossez@...italocean.com>,
Peter Zijlstra <peterz@...radead.org>,
Vineeth Pillai <viremana@...ux.microsoft.com>,
Joel Fernandes <joelaf@...gle.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Aaron Lu <aaron.lwe@...il.com>,
Aubrey Li <aubrey.intel@...il.com>,
Dhaval Giani <dhaval.giani@...cle.com>,
Chris Hyser <chris.hyser@...cle.com>,
Nishanth Aravamudan <naravamudan@...italocean.com>
Cc: "Joel Fernandes \(Google\)" <joel@...lfernandes.org>,
mingo@...nel.org, pjt@...gle.com, torvalds@...ux-foundation.org,
linux-kernel@...r.kernel.org, fweisbec@...il.com,
keescook@...omium.org, kerrnel@...gle.com,
Phil Auld <pauld@...hat.com>,
Valentin Schneider <valentin.schneider@....com>,
Mel Gorman <mgorman@...hsingularity.net>,
Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
Paolo Bonzini <pbonzini@...hat.com>, vineeth@...byteword.org,
Chen Yu <yu.c.chen@...el.com>,
Christian Brauner <christian.brauner@...ntu.com>,
Agata Gruza <agata.gruza@...el.com>,
Antonio Gomez Iglesias <antonio.gomez.iglesias@...el.com>,
graf@...zon.com, konrad.wilk@...cle.com, dfaggioli@...e.com,
rostedt@...dmis.org, derkling@...gle.com, benbjiang@...cent.com,
Julien Desfossez <jdesfossez@...italocean.com>,
Aubrey Li <aubrey.li@...ux.intel.com>,
Tim Chen <tim.c.chen@...el.com>,
"Paul E . McKenney" <paulmck@...nel.org>
Subject: Re: [RFC PATCH v7 17/23] kernel/entry: Add support for core-wide protection of kernel-mode
On Fri, Aug 28 2020 at 15:51, Julien Desfossez wrote:
> @@ -112,59 +113,84 @@ static __always_inline void exit_to_user_mode(void)
> /* Workaround to allow gradual conversion of architecture code */
> void __weak arch_do_signal(struct pt_regs *regs) { }
>
> -static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> - unsigned long ti_work)
Can the rework of that code please be split out into a seperate patch
and then adding that unsafe muck on top?
> +static inline bool exit_to_user_mode_work_pending(unsigned long ti_work)
> {
> - /*
> - * Before returning to user space ensure that all pending work
> - * items have been completed.
> - */
> - while (ti_work & EXIT_TO_USER_MODE_WORK) {
> + return (ti_work & EXIT_TO_USER_MODE_WORK);
> +}
>
> - local_irq_enable_exit_to_user(ti_work);
> +static inline void exit_to_user_mode_work(struct pt_regs *regs,
> + unsigned long ti_work)
> +{
>
> - if (ti_work & _TIF_NEED_RESCHED)
> - schedule();
> + local_irq_enable_exit_to_user(ti_work);
>
> - if (ti_work & _TIF_UPROBE)
> - uprobe_notify_resume(regs);
> + if (ti_work & _TIF_NEED_RESCHED)
> + schedule();
>
> - if (ti_work & _TIF_PATCH_PENDING)
> - klp_update_patch_state(current);
> + if (ti_work & _TIF_UPROBE)
> + uprobe_notify_resume(regs);
>
> - if (ti_work & _TIF_SIGPENDING)
> - arch_do_signal(regs);
> + if (ti_work & _TIF_PATCH_PENDING)
> + klp_update_patch_state(current);
>
> - if (ti_work & _TIF_NOTIFY_RESUME) {
> - clear_thread_flag(TIF_NOTIFY_RESUME);
> - tracehook_notify_resume(regs);
> - rseq_handle_notify_resume(NULL, regs);
> - }
> + if (ti_work & _TIF_SIGPENDING)
> + arch_do_signal(regs);
> +
> + if (ti_work & _TIF_NOTIFY_RESUME) {
> + clear_thread_flag(TIF_NOTIFY_RESUME);
> + tracehook_notify_resume(regs);
> + rseq_handle_notify_resume(NULL, regs);
> + }
> +
> + /* Architecture specific TIF work */
> + arch_exit_to_user_mode_work(regs, ti_work);
> +
> + local_irq_disable_exit_to_user();
> +}
>
> - /* Architecture specific TIF work */
> - arch_exit_to_user_mode_work(regs, ti_work);
> +static unsigned long exit_to_user_mode_loop(struct pt_regs *regs)
> +{
> + unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
> +
> + /*
> + * Before returning to user space ensure that all pending work
> + * items have been completed.
> + */
> + while (1) {
> + /* Both interrupts and preemption could be enabled here. */
What? Preemption is enabled here, but interrupts are disabled.
> + if (exit_to_user_mode_work_pending(ti_work))
> + exit_to_user_mode_work(regs, ti_work);
> +
> + /* Interrupts may be reenabled with preemption disabled. */
> + sched_core_unsafe_exit_wait(EXIT_TO_USER_MODE_WORK);
> +
> /*
> - * Disable interrupts and reevaluate the work flags as they
> - * might have changed while interrupts and preemption was
> - * enabled above.
> + * Reevaluate the work flags as they might have changed
> + * while interrupts and preemption were enabled.
What enables preemption and interrupts? Can you pretty please write
comments which explain what's going on.
> */
> - local_irq_disable_exit_to_user();
> ti_work = READ_ONCE(current_thread_info()->flags);
> +
> + /*
> + * We may be switching out to another task in kernel mode. That
> + * process is responsible for exiting the "unsafe" kernel mode
> + * when it returns to user or guest.
> + */
> + if (exit_to_user_mode_work_pending(ti_work))
> + sched_core_unsafe_enter();
> + else
> + break;
> }
>
> - /* Return the latest work state for arch_exit_to_user_mode() */
> - return ti_work;
> + return ti_work;
> }
>
> static void exit_to_user_mode_prepare(struct pt_regs *regs)
> {
> - unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
> + unsigned long ti_work;
>
> lockdep_assert_irqs_disabled();
>
> - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
> - ti_work = exit_to_user_mode_loop(regs, ti_work);
> + ti_work = exit_to_user_mode_loop(regs);
Why has the above loop to be invoked unconditionally even when that core
scheduling muck is disabled? Just to make all return to user paths more
expensive unconditionally, right?
Thanks,
tglx
Powered by blists - more mailing lists