[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20211118163405.adczki7ibk22bw7e@linutronix.de>
Date: Thu, 18 Nov 2021 17:34:05 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>,
linux-rt-users@...r.kernel.org,
Steven Rostedt <rostedt@...dmis.org>
Subject: [ANNOUNCE] v5.16-rc1-rt2
Dear RT folks!
I'm pleased to announce the v5.16-rc1-rt2 patch set.
Changes since v5.16-rc1-rt1:
- Redo the delayed deallocation of the task-stack.
Known issues
- netconsole triggers WARN.
- The "Memory controller" (CONFIG_MEMCG) has been disabled.
- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com
The delta patch against v5.16-rc1-rt1 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.16/incr/patch-5.16-rc1-rt1-rt2.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.16-rc1-rt2
The RT patch against v5.16-rc1 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.16/older/patch-5.16-rc1-rt2.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.16/older/patches-5.16-rc1-rt2.tar.xz
Sebastian
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index 1d5716bc060be..2526fd3be5fd7 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ALPHA_SPINLOCK_TYPES_H
#define _ALPHA_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0f9e8bec83ac..8535984ea7440 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -33,6 +33,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
@@ -68,7 +69,7 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -109,6 +110,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
@@ -124,6 +126,7 @@ config ARM
select OLD_SIGSUSPEND3
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 5976958647fe1..0c14b36ef1013 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 164e15f26485d..666da94ed9b77 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -54,6 +54,7 @@ struct cpu_context_save {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
#ifndef CONFIG_THREAD_INFO_IN_TASK
struct task_struct *task; /* main task structure */
#endif
@@ -152,6 +153,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 9
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -166,6 +168,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
/* Checks for any syscall work in entry-common.S */
@@ -175,7 +178,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
/*
* Change these and you break ASM code in entry-common.S
*/
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NOTIFY_SIGNAL)
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 645845e4982a6..73e321c6d1525 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
#ifndef CONFIG_THREAD_INFO_IN_TASK
DEFINE(TI_TASK, offsetof(struct thread_info, task));
#endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index deff286eb5ea0..277238281cca8 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -203,11 +203,18 @@ ENDPROC(__dabt_svc)
#ifdef CONFIG_PREEMPTION
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
- ldr r0, [tsk, #TI_FLAGS] @ get flags
teq r8, #0 @ if preempt count != 0
+ bne 1f @ return from exeption
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
+ blne svc_preempt @ preempt!
+
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r8, #0 @ if preempt lazy count != 0
movne r0, #0 @ force flags to 0
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED_LAZY
blne svc_preempt
+1:
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -222,8 +229,14 @@ ENDPROC(__irq_svc)
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
+ bne 1b
+ tst r0, #_TIF_NEED_RESCHED_LAZY
reteq r8 @ go again
- b 1b
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r0, #0 @ if preempt lazy count != 0
+ beq 1b
+ ret r8 @ go again
+
#endif
__und_fault:
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index a41e27ace391f..1e29cec7716f0 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
*/
trace_hardirqs_off();
do {
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY))) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cde5b6d8bac5d..bcf3222301c5e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -676,9 +676,7 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_BACKTRACE:
- printk_deferred_enter();
nmi_cpu_backtrace(get_irq_regs());
- printk_deferred_exit();
break;
default:
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index bc8779d54a640..12dba4284b212 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -407,6 +407,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -477,6 +480,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4207cf9bb17f..6bd4acc2be02f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -89,6 +89,7 @@ config ARM64
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -192,6 +193,7 @@ config ARM64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_PREEMPT_LAZY
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -214,6 +216,7 @@ config ARM64
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_SYSCALL if PCI
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select POWER_RESET
select POWER_SUPPLY
select SPARSE_IRQ
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c4ba047a82d26..7c83a6655d1cc 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
*/
static inline bool arch_faults_on_old_pte(void)
{
- WARN_ON(preemptible());
+ WARN_ON(is_migratable());
return !cpu_has_hw_af();
}
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index e83f0982b99c1..2545c17281e1c 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void)
* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
* pair.
*/
- return !pc || !READ_ONCE(ti->preempt_count);
+ if (!pc || !READ_ONCE(ti->preempt_count))
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if ((pc & ~PREEMPT_NEED_RESCHED))
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
}
static inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ if (pc == preempt_offset)
+ return true;
+
+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+ return false;
+
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
u64 pc = READ_ONCE(current_thread_info()->preempt_count);
return pc == preempt_offset;
+#endif
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
index ef449f5f4ba80..5e535c3e49260 100644
--- a/arch/arm64/include/asm/signal.h
+++ b/arch/arm64/include/asm/signal.h
@@ -22,4 +22,8 @@ static inline void __user *arch_untagged_si_addr(void __user *addr,
}
#define arch_untagged_si_addr arch_untagged_si_addr
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 18782f0c47212..11ab1c0776977 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -5,7 +5,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
# error "please don't include this file directly"
#endif
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e1317b7c45251..861594d9662df 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -26,6 +26,7 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 7
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -98,8 +100,10 @@ int arch_dup_task_struct(struct task_struct *dst,
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
_TIF_NOTIFY_SIGNAL)
@@ -108,6 +112,8 @@ int arch_dup_task_struct(struct task_struct *dst,
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_SYSCALL_EMU)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
#ifdef CONFIG_SHADOW_CALL_STACK
#define INIT_SCS \
.scs_base = init_shadow_call_stack, \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 6d0c3afd36b8b..9b11f996b8ea0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -32,6 +32,7 @@ int main(void)
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index fa244c426f610..38ebf33820025 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -201,10 +201,19 @@ static void __get_cpu_fpsimd_context(void)
*
* The double-underscore version must only be called if you know the task
* can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
*/
static void get_cpu_fpsimd_context(void)
{
- local_bh_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
__get_cpu_fpsimd_context();
}
@@ -225,7 +234,10 @@ static void __put_cpu_fpsimd_context(void)
static void put_cpu_fpsimd_context(void)
{
__put_cpu_fpsimd_context();
- local_bh_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
}
static bool have_cpu_fpsimd_context(void)
@@ -1125,6 +1137,8 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
+ void *sve_state = NULL;
+
if (!system_supports_fpsimd())
return;
@@ -1136,11 +1150,16 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
+
+ /* Defer kfree() while in atomic context */
+ sve_state = current->thread.sve_state;
+ current->thread.sve_state = NULL;
+
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}
put_cpu_fpsimd_context();
+ kfree(sve_state);
}
/*
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 8f6372b44b658..ab23598fdeb06 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -920,7 +920,7 @@ static void do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -928,6 +928,14 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
} else {
local_daif_restore(DAIF_PROCCTX);
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(&t->forced_info);
+ t->forced_info.si_signo = 0;
+ }
+#endif
+
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 2f03cbfefe676..88793cc5b9302 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -813,7 +813,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
- preempt_disable();
+ migrate_disable();
kvm_pmu_flush_hwstate(vcpu);
@@ -837,7 +837,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
- preempt_enable();
+ migrate_enable();
continue;
}
@@ -909,7 +909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, ret);
- preempt_enable();
+ migrate_enable();
/*
* The ARMv8 architecture doesn't give the hypervisor
diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h
index 8ff0f6ff3a006..db87a12c3827d 100644
--- a/arch/csky/include/asm/spinlock_types.h
+++ b/arch/csky/include/asm/spinlock_types.h
@@ -3,7 +3,7 @@
#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
#define __ASM_CSKY_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
index 19d233497ba52..d5f66495b670f 100644
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -8,7 +8,7 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 6e345fefcdcab..14b8a161c1652 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_IA64_SPINLOCK_TYPES_H
#define _ASM_IA64_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb377062..1684716f08201 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node) \
+#define arch_alloc_thread_stack_node(tsk, node) \
((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
#else
#define current_thread_info() ((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
#define task_thread_info(tsk) ((struct thread_info *) 0)
#endif
-#define free_thread_stack(tsk) /* nothing */
+#define arch_free_thread_stack(tsk) /* nothing */
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dea74d7717c0d..6255e4d375395 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -153,6 +153,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
@@ -221,6 +222,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
@@ -237,6 +239,7 @@ config PPC
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
index 0f3cdd8faa959..08243338069d2 100644
--- a/arch/powerpc/include/asm/simple_spinlock_types.h
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 007332a4a7325..3d3c9ba50f442 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -62,6 +62,7 @@ struct smp_ops_t {
extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern void smp_send_debugger_break_cpu(unsigned int cpu);
extern void smp_send_debugger_break(void);
extern void start_secondary_resume(void);
extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index c5d742f18021d..d5f8a74ed2e8c 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
index 1c8460e235838..b1653c160bab9 100644
--- a/arch/powerpc/include/asm/stackprotector.h
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void)
unsigned long canary;
/* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT
+ canary = (unsigned long)&canary;
+#else
canary = get_random_canary();
+#endif
canary ^= mftb();
canary ^= LINUX_VERSION_CODE;
canary &= CANARY_MASK;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 5725029aaa295..829315ee9c567 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -47,6 +47,8 @@
struct thread_info {
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ int preempt_lazy_count; /* 0 => preemptable,
+ <0 => BUG */
#ifdef CONFIG_SMP
unsigned int cpu;
#endif
@@ -71,6 +73,7 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.preempt_count = INIT_PREEMPT_COUNT, \
+ .preempt_lazy_count = 0, \
.flags = 0, \
}
@@ -96,6 +99,7 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -111,6 +115,7 @@ void arch_setup_new_exec(void);
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_32BIT 20 /* 32 bit binary */
+
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -122,6 +127,7 @@ void arch_setup_new_exec(void);
#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
#define _TIF_NOERROR (1<<TIF_NOERROR)
@@ -135,10 +141,12 @@ void arch_setup_new_exec(void);
_TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
/* Bits in local_flags */
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 835b626cd4760..3647020770a6d 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -346,7 +346,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
ti_flags = READ_ONCE(current_thread_info()->flags);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
- if (ti_flags & _TIF_NEED_RESCHED) {
+ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
schedule();
} else {
/*
@@ -552,11 +552,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPT)) {
+ if (IS_ENABLED(CONFIG_PREEMPTION)) {
/* Return to preemptible kernel context */
if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
preempt_schedule_irq();
+ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
+ if ((preempt_count() == 0) &&
+ (current_thread_info()->preempt_lazy_count == 0))
+ preempt_schedule_irq();
}
}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c4f1d6b7d9923..02e17a57da83f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -690,6 +690,7 @@ static inline void check_stack_overflow(void)
}
}
+#ifndef CONFIG_PREEMPT_RT
static __always_inline void call_do_softirq(const void *sp)
{
/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
@@ -708,6 +709,7 @@ static __always_inline void call_do_softirq(const void *sp)
"r11", "r12"
);
}
+#endif
static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
{
@@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
void *softirq_ctx[NR_CPUS] __read_mostly;
void *hardirq_ctx[NR_CPUS] __read_mostly;
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
call_do_softirq(softirq_ctx[smp_processor_id()]);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index bdee7262c080a..d57d374978627 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
static int kgdb_debugger_ipi(struct pt_regs *regs)
{
- kgdb_nmicallback(raw_smp_processor_id(), regs);
+ int cpu = raw_smp_processor_id();
+
+ if (!kgdb_roundup_delay(cpu))
+ kgdb_nmicallback(cpu, regs);
return 0;
}
#ifdef CONFIG_SMP
+void kgdb_roundup_cpu(unsigned int cpu)
+{
+ smp_send_debugger_break_cpu(cpu);
+}
+
void kgdb_roundup_cpus(void)
{
smp_send_debugger_break();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c23ee842c4c33..4ebf21908237c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -589,6 +589,11 @@ static void debugger_ipi_callback(struct pt_regs *regs)
debugger_ipi(regs);
}
+void smp_send_debugger_break_cpu(unsigned int cpu)
+{
+ smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
+}
+
void smp_send_debugger_break(void)
{
smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11741703d26e0..7e4e1f489f56a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -260,12 +260,17 @@ static char *get_mmu_str(void)
static int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
PAGE_SIZE / 1024, get_mmu_str(),
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 22ceeeb705abd..d5359701f787f 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -312,9 +312,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
unsigned int i;
int (*old_handler)(struct pt_regs *regs);
- /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
- printk_deferred_enter();
-
/*
* This function is only called after the system
* has panicked or is otherwise in a critical state.
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ff581d70f20cf..e5c84d55bdfbc 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -178,6 +178,7 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
+ depends on !PREEMPT_RT
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 49b401536d297..c79686747c7cc 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/local_lock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
return ret;
}
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+ __be64 * page;
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(&tce_page.lock, flags);
- tcep = __this_cpu_read(tce_page);
+ tcep = __this_cpu_read(tce_page.page);
/* This is safe to do since interrupts are off when we're called
* from iommu_alloc{,_sg}()
@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tceshift,
npages, uaddr, direction, attrs);
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
rpn = __pa(uaddr) >> tceshift;
@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
DMA_BIDIRECTIONAL, 0);
}
- local_irq_disable(); /* to protect tcep and the page behind it */
- tcep = __this_cpu_read(tce_page);
+ /* to protect tcep and the page behind it */
+ local_lock_irq(&tce_page.lock);
+ tcep = __this_cpu_read(tce_page.page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return -ENOMEM;
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return rc;
}
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
index f398e7638dd63..5a35a49505da2 100644
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
#define _ASM_RISCV_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index a2bbfd7df85fa..b69695e399574 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index e82369f286a20..907bda4b1619a 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SH_SPINLOCK_TYPES_H
#define __ASM_SH_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index ef0f0827cf575..2d3eca8fee011 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
struct thread_info *curctx;
@@ -176,6 +177,7 @@ void do_softirq_own_stack(void)
"r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
);
}
+#endif
#else
static inline void handle_one_irq(unsigned int irq)
{
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index c8848bb681a11..41fa1be980a33 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
set_irq_regs(old_regs);
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@@ -869,6 +870,7 @@ void do_softirq_own_stack(void)
__asm__ __volatile__("mov %0, %%sp"
: : "r" (orig_sp));
}
+#endif
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 95dd1ee01546a..5ab3c0b3643db 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,6 +108,7 @@ config X86
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_SUPPORTS_RT
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
@@ -234,6 +235,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ecb..a34430b7af4a3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -22,6 +22,7 @@
#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
#ifndef __ASSEMBLY__
+#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index fe5efbcba8240..ab8cb5fc2329e 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val)
* a decrement which hits zero means we have no preempt_count and should
* reschedule.
*/
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
{
return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
}
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ if (____preempt_count_dec_and_test())
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if (preempt_count())
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
+}
+
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u32 tmp;
+ tmp = raw_cpu_read_4(__preempt_count);
+ if (tmp == preempt_offset)
+ return true;
+
+ /* preempt count == 0 ? */
+ tmp &= ~PREEMPT_NEED_RESCHED;
+ if (tmp != preempt_offset)
+ return false;
+ /* XXX PREEMPT_LOCK_OFFSET */
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2dfb5fea13aff..fc03f4f7ed84c 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,6 +28,19 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT are mutexes. By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#ifndef CONFIG_COMPAT
#define compat_sigset_t compat_sigset_t
typedef sigset_t compat_sigset_t;
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 24a8d6c4fb185..2fc22c27df183 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -50,7 +50,7 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
+ u64 canary = 0;
u64 tsc;
#ifdef CONFIG_X86_64
@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void)
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
+ * For preempt-rt we need to weaken the randomness a bit, as
+ * we can't call into the random generator from atomic context
+ * due to locking constraints. We just leave canary
+ * uninitialized and use the TSC based randomness on top of it.
*/
+#ifndef CONFIG_PREEMPT_RT
get_random_bytes(&canary, sizeof(canary));
+#endif
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ebec69c35e951..39005bff5b8f3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,6 +57,8 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
u32 status; /* thread synchronous flags */
+ int preempt_lazy_count; /* 0 => lazy preemptable
+ <0 => BUG */
#ifdef CONFIG_SMP
u32 cpu; /* current CPU */
#endif
@@ -65,6 +67,7 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = 0, \
+ .preempt_lazy_count = 0, \
}
#else /* !__ASSEMBLY__ */
@@ -93,6 +96,7 @@ struct thread_info {
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
#define TIF_SLD 18 /* Restore split lock detection on context switch */
+#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -117,6 +121,7 @@ struct thread_info {
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_SLD (1 << TIF_SLD)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4794b716ec79e..b4f8503544978 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -75,11 +75,12 @@ void hv_remove_vmbus_handler(void)
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
{
struct pt_regs *old_regs = set_irq_regs(regs);
+ u64 ip = regs ? instruction_pointer(regs) : 0;
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
- add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
+ add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0, ip);
ack_APIC_irq();
set_irq_regs(old_regs);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 722fd712e1cf0..82cc3a7be6bd2 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -141,7 +141,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ pr_warn_once("WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6c5defd6569a3..5f725b0ceb297 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -207,7 +207,8 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
if (task == current)
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ pr_warn_once("WARNING: stack recursion on stack type %d\n",
+ info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 15aefa3f3e18e..52af9a89ad478 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -207,8 +207,7 @@ static void mask_and_ack_8259A(struct irq_data *data)
* lets ACK and report it. [once per IRQ]
*/
if (!(spurious_irq_mask & irqmask)) {
- printk_deferred(KERN_DEBUG
- "spurious 8259A interrupt: IRQ%d.\n", irq);
+ printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
spurious_irq_mask |= irqmask;
}
atomic_inc(&irq_err_count);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3a43a2dee6581..37bd37cdf2b60 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
if (atomic_read(&kgdb_active) != -1) {
/* KGDB CPU roundup */
cpu = raw_smp_processor_id();
- kgdb_nmicallback(cpu, regs);
- set_bit(cpu, was_in_debug_nmi);
- touch_nmi_watchdog();
+
+ if (!kgdb_roundup_delay(cpu)) {
+ kgdb_nmicallback(cpu, regs);
+ set_bit(cpu, was_in_debug_nmi);
+ touch_nmi_watchdog();
+ }
return NMI_HANDLED;
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 8e1c50c86e5db..06080d9546e53 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -41,9 +41,9 @@ static void unwind_dump(struct unwind_state *state)
dumped_before = true;
- printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
- state->stack_info.type, state->stack_info.next_sp,
- state->stack_mask, state->graph_idx);
+ printk("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
+ state->stack_info.type, state->stack_info.next_sp,
+ state->stack_mask, state->graph_idx);
for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
@@ -59,13 +59,11 @@ static void unwind_dump(struct unwind_state *state)
if (zero) {
if (!prev_zero)
- printk_deferred("%p: %0*x ...\n",
- sp, BITS_PER_LONG/4, 0);
+ printk("%p: %0*x ...\n", sp, BITS_PER_LONG/4, 0);
continue;
}
- printk_deferred("%p: %0*lx (%pB)\n",
- sp, BITS_PER_LONG/4, word, (void *)word);
+ printk("%p: %0*lx (%pB)\n", sp, BITS_PER_LONG/4, word, (void *)word);
}
}
}
@@ -341,13 +339,13 @@ bool unwind_next_frame(struct unwind_state *state)
goto the_end;
if (state->regs) {
- printk_deferred_once(KERN_WARNING
+ pr_warn_once(
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
state->regs, state->task->comm,
state->task->pid, next_bp);
unwind_dump(state);
} else {
- printk_deferred_once(KERN_WARNING
+ pr_warn_once(
"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
state->bp, state->task->comm,
state->task->pid, next_bp);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 2de3c8c5eba9f..9cf5bd20e48af 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -9,7 +9,7 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+ pr_warn_once("WARNING: " fmt, ##__VA_ARGS__)
#define orc_warn_current(args...) \
({ \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dc7eb5fddfd3e..37c1d06ad207a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8605,6 +8605,12 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
r = -ENOMEM;
x86_emulator_cache = kvm_alloc_emulator_cache();
diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
index 64c9389254f13..797aed7df3dd8 100644
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
# error "please don't include this file directly"
#endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ab34c4f20daf..6022716ff00f2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1856,14 +1856,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
- int cpu = get_cpu();
+ int cpu = get_cpu_light();
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
- put_cpu();
+ put_cpu_light();
return;
}
- put_cpu();
+ put_cpu_light();
}
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index a1bea0f4baa88..5f8ca8c1f59cd 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -36,6 +36,7 @@ static struct workqueue_struct *cryptd_wq;
struct cryptd_cpu_queue {
struct crypto_queue queue;
struct work_struct work;
+ spinlock_t qlock;
};
struct cryptd_queue {
@@ -105,6 +106,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue,
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
+ spin_lock_init(&cpu_queue->qlock);
}
pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
return 0;
@@ -129,8 +131,10 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
struct cryptd_cpu_queue *cpu_queue;
refcount_t *refcnt;
- cpu = get_cpu();
- cpu_queue = this_cpu_ptr(queue->cpu_queue);
+ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+ spin_lock_bh(&cpu_queue->qlock);
+ cpu = smp_processor_id();
+
err = crypto_enqueue_request(&cpu_queue->queue, request);
refcnt = crypto_tfm_ctx(request->tfm);
@@ -146,7 +150,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
refcount_inc(refcnt);
out_put_cpu:
- put_cpu();
+ spin_unlock_bh(&cpu_queue->qlock);
return err;
}
@@ -162,16 +166,11 @@ static void cryptd_queue_worker(struct work_struct *work)
cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
/*
* Only handle one request at a time to avoid hogging crypto workqueue.
- * preempt_disable/enable is used to prevent being preempted by
- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
- * cryptd_enqueue_request() being accessed from software interrupts.
*/
- local_bh_disable();
- preempt_disable();
+ spin_lock_bh(&cpu_queue->qlock);
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
- preempt_enable();
- local_bh_enable();
+ spin_unlock_bh(&cpu_queue->qlock);
if (!req)
return;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 08d7953ec5f10..2b2a1d472a7c6 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio);
+#ifdef CONFIG_PREEMPT_RT
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+ size_t index;
+
+ for (index = 0; index < num_pages; index++)
+ spin_lock_init(&zram->table[index].lock);
+}
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+ int ret;
+
+ ret = spin_trylock(&zram->table[index].lock);
+ if (ret)
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+ return ret;
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+ spin_lock(&zram->table[index].lock);
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+ spin_unlock(&zram->table[index].lock);
+}
+
+#else
+
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
static int zram_slot_trylock(struct zram *zram, u32 index)
{
@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
{
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
}
+#endif
static inline bool init_done(struct zram *zram)
{
@@ -1199,6 +1234,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
+ zram_meta_init_table_locks(zram, num_pages);
return true;
}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 80c3b43b4828f..d8f6d880f9151 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -63,6 +63,7 @@ struct zram_table_entry {
unsigned long element;
};
unsigned long flags;
+ spinlock_t lock;
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 605969ed0f965..56b2d5a7e2a07 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1242,26 +1242,25 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
return *ptr;
}
-void add_interrupt_randomness(int irq, int irq_flags)
+void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
{
struct entropy_store *r;
struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
- struct pt_regs *regs = get_irq_regs();
unsigned long now = jiffies;
cycles_t cycles = random_get_entropy();
__u32 c_high, j_high;
- __u64 ip;
if (cycles == 0)
- cycles = get_reg(fast_pool, regs);
+ cycles = get_reg(fast_pool, NULL);
c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
j_high = (sizeof(now) > 4) ? now >> 32 : 0;
fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
fast_pool->pool[1] ^= now ^ c_high;
- ip = regs ? instruction_pointer(regs) : _RET_IP_;
+ if (!ip)
+ ip = _RET_IP_;
fast_pool->pool[2] ^= ip;
fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
- get_reg(fast_pool, regs);
+ get_reg(fast_pool, NULL);
fast_mix(fast_pool);
add_interrupt_bench(cycles);
@@ -1507,9 +1506,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
print_once = true;
#endif
if (__ratelimit(&unseeded_warning))
- printk_deferred(KERN_NOTICE "random: %s called from %pS "
- "with crng_init=%d\n", func_name, caller,
- crng_init);
+ pr_notice("random: %s called from %pS with crng_init=%d\n",
+ func_name, caller, crng_init);
}
/*
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index d3f2e5364c275..9c4a99757afd3 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
return container_of(data, struct tpm_tis_tcg_phy, priv);
}
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * Flushes previous write operations to chip so that a subsequent
+ * ioread*()s won't stall a cpu.
+ */
+static inline void tpm_tis_flush(void __iomem *iobase)
+{
+ ioread8(iobase + TPM_ACCESS(0));
+}
+#else
+#define tpm_tis_flush(iobase) do { } while (0)
+#endif
+
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
+{
+ iowrite8(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
+{
+ iowrite32(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
static int interrupts = -1;
module_param(interrupts, int, 0444);
MODULE_PARM_DESC(interrupts, "Enable interrupts");
@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
while (len--)
- iowrite8(*value++, phy->iobase + addr);
+ tpm_tis_iowrite8(*value++, phy->iobase, addr);
return 0;
}
@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
{
struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
- iowrite32(value, phy->iobase + addr);
+ tpm_tis_iowrite32(value, phy->iobase, addr);
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 254e67141a776..7a39029b083f4 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
*/
intel_psr_wait_for_idle(new_crtc_state);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
crtc->debug.min_vbl = min;
crtc->debug.max_vbl = max;
@@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
break;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
timeout = schedule_timeout(timeout);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
finish_wait(wq, &wait);
@@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
return;
irq_disable:
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
new_crtc_state->uapi.event = NULL;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
/* Send VRR Push to terminate Vblank */
intel_vrr_send_push(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 209cf265bf746..6e1b9068d944c 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
/* Kick the work once more to drain the signalers, and disarm the irq */
irq_work_sync(&b->irq_work);
while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
- signal_irq_work(&b->irq_work);
- local_irq_enable();
+ irq_work_queue(&b->irq_work);
cond_resched();
+ irq_work_sync(&b->irq_work);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 246c37d72cd73..8a575629ef1b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce)
static inline void intel_context_mark_active(struct intel_context *ce)
{
- lockdep_assert_held(&ce->timeline->mutex);
+ lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &ce->flags));
++ce->active_count;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 9e0177dc5484e..329f470d125f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -118,6 +118,7 @@ struct intel_context {
#define CONTEXT_LRCA_DIRTY 9
#define CONTEXT_GUC_INIT 10
#define CONTEXT_PERMA_PIN 11
+#define CONTEXT_IS_PARKED 12
struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index a1334b48dde7b..456f1f5d0c04e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
return 0;
}
-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
- return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
- mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
- local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct i915_request *rq = to_request(fence);
@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
- unsigned long flags;
bool result = true;
/*
@@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* engine->wakeref.count, we may see the request completion and retire
* it causing an underflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(ce);
+ set_bit(CONTEXT_IS_PARKED, &ce->flags);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
rq = __i915_request_create(ce, GFP_NOWAIT);
@@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
result = false;
out_unlock:
- __timeline_mark_unlock(ce, flags);
+ clear_bit(CONTEXT_IS_PARKED, &ce->flags);
return result;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index bedb80057046a..1dbcac05f44eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1284,7 +1284,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
- spin_lock(&sched_engine->lock);
+ spin_lock_irq(&sched_engine->lock);
/*
* If the queue is higher priority than the last
@@ -1384,7 +1384,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
return;
}
}
@@ -1410,7 +1410,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.sched_engine->lock);
- spin_unlock(&engine->sched_engine->lock);
+ spin_unlock_irq(&engine->sched_engine->lock);
return; /* leave this for another sibling */
}
@@ -1572,7 +1572,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
sched_engine->queue_priority_hint = queue_prio(sched_engine);
i915_sched_engine_reset_on_empty(sched_engine);
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
/*
* We can skip poking the HW if we ended up with exactly the same set
@@ -1598,13 +1598,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
}
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
- local_irq_disable(); /* Suspend interrupts across request submission */
- execlists_dequeue(engine);
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
static void clear_ports(struct i915_request **ports, int count)
{
memset_p((void **)ports, NULL, count);
@@ -2424,7 +2417,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
}
if (!engine->execlists.pending[0]) {
- execlists_dequeue_irq(engine);
+ execlists_dequeue(engine);
start_timeslice(engine);
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 77680bca46eec..be8faaaa60226 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -916,7 +916,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
*/
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
/* Get optional system timestamp before query. */
if (stime)
@@ -980,7 +981,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
if (etime)
*etime = ktime_get();
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 820a1f38b271e..3bbe34ff3b15a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request)
*/
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dc359242d1aec..c5898882bb27a 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -642,7 +642,8 @@ i915_request_timeline(const struct i915_request *rq)
{
/* Valid only while the request is being constructed (or retired). */
return rcu_dereference_protected(rq->timeline,
- lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
}
static inline struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 8104981a66044..89a4089bc4baf 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -2,6 +2,10 @@
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _I915_TRACE_H_
+#ifdef CONFIG_PREEMPT_RT
+#define NOTRACE
+#endif
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
@@ -819,7 +823,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
TP_ARGS(rq)
);
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
DEFINE_EVENT(i915_request, i915_request_guc_submit,
TP_PROTO(struct i915_request *rq),
TP_ARGS(rq)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 7a5925072466a..b7b56fb1e2fc7 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -344,7 +344,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
#else
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 3a1f007b678a0..7be17d52eaaea 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <linux/hyperv.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include "hv_trace.h"
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 392c1ac4f8193..c5e9725fb5ff2 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -22,6 +22,7 @@
#include <linux/clockchips.h>
#include <linux/cpu.h>
#include <linux/sched/task_stack.h>
+#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/notifier.h>
@@ -1337,6 +1338,8 @@ static void vmbus_isr(void)
void *page_addr = hv_cpu->synic_event_page;
struct hv_message *msg;
union hv_synic_event_flags *event;
+ struct pt_regs *regs = get_irq_regs();
+ u64 ip = regs ? instruction_pointer(regs) : 0;
bool handled = false;
if (unlikely(page_addr == NULL))
@@ -1381,7 +1384,7 @@ static void vmbus_isr(void)
tasklet_schedule(&hv_cpu->msg_dpc);
}
- add_interrupt_randomness(vmbus_interrupt, 0);
+ add_interrupt_randomness(vmbus_interrupt, 0, ip);
}
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c1a5877cf9f6..e748c0e333495 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2217,8 +2217,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
struct raid5_percpu *percpu;
unsigned long cpu;
- cpu = get_cpu();
+ cpu = get_cpu_light();
percpu = per_cpu_ptr(conf->percpu, cpu);
+ spin_lock(&percpu->lock);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
@@ -2277,7 +2278,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
- put_cpu();
+ spin_unlock(&percpu->lock);
+ put_cpu_light();
}
static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
@@ -7102,6 +7104,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
__func__, cpu);
return -ENOMEM;
}
+ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
return 0;
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 5c05acf20e1f2..665fe138ab4f7 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -635,6 +635,7 @@ struct r5conf {
int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
+ spinlock_t lock; /* Protection for -RT */
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 6415f88738ada..556284ea978b0 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1450,11 +1450,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
{
struct fcoe_percpu_s *fps;
- int rc;
+ int rc, cpu = get_cpu_light();
- fps = &get_cpu_var(fcoe_percpu);
+ fps = &per_cpu(fcoe_percpu, cpu);
rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
- put_cpu_var(fcoe_percpu);
+ put_cpu_light();
return rc;
}
@@ -1639,11 +1639,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
return 0;
}
- stats = per_cpu_ptr(lport->stats, get_cpu());
+ stats = per_cpu_ptr(lport->stats, get_cpu_light());
stats->InvalidCRCCount++;
if (stats->InvalidCRCCount < 5)
printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
- put_cpu();
+ put_cpu_light();
return -EINVAL;
}
@@ -1684,7 +1684,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
*/
hp = (struct fcoe_hdr *) skb_network_header(skb);
- stats = per_cpu_ptr(lport->stats, get_cpu());
+ stats = per_cpu_ptr(lport->stats, get_cpu_light());
if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
if (stats->ErrorFrames < 5)
printk(KERN_WARNING "fcoe: FCoE version "
@@ -1716,13 +1716,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
goto drop;
if (!fcoe_filter_frames(lport, fp)) {
- put_cpu();
+ put_cpu_light();
fc_exch_recv(lport, fp);
return;
}
drop:
stats->ErrorFrames++;
- put_cpu();
+ put_cpu_light();
kfree_skb(skb);
}
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 1756a0ac6f083..3a2cbf35ea3d6 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
INIT_LIST_HEAD(&del_list);
- stats = per_cpu_ptr(fip->lp->stats, get_cpu());
+ stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
sel_time = fcf->time;
}
}
- put_cpu();
+ put_cpu_light();
list_for_each_entry_safe(fcf, next, &del_list, list) {
/* Removes fcf from current list */
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 841000445b9a1..26d661ddc9509 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
}
memset(ep, 0, sizeof(*ep));
- cpu = get_cpu();
+ cpu = get_cpu_light();
pool = per_cpu_ptr(mp->pool, cpu);
spin_lock_bh(&pool->lock);
- put_cpu();
+ put_cpu_light();
/* peek cache of free slot */
if (pool->left != FC_XID_UNKNOWN) {
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 6473361525d1f..2321d02e9b7a9 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -132,12 +132,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
up->dl_write(up, value);
}
+static inline void serial8250_set_IER(struct uart_8250_port *up,
+ unsigned char ier)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ serial_out(up, UART_IER, ier);
+
+ if (is_console)
+ console_atomic_unlock(flags);
+}
+
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
+{
+ struct uart_port *port = &up->port;
+ unsigned int clearval = 0;
+ unsigned long flags;
+ unsigned int prior;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (up->capabilities & UART_CAP_UUE)
+ clearval = UART_IER_UUE;
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ prior = serial_port_in(port, UART_IER);
+ serial_port_out(port, UART_IER, clearval);
+
+ if (is_console)
+ console_atomic_unlock(flags);
+
+ return prior;
+}
+
static inline bool serial8250_set_THRI(struct uart_8250_port *up)
{
if (up->ier & UART_IER_THRI)
return false;
up->ier |= UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
@@ -146,7 +189,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
if (!(up->ier & UART_IER_THRI))
return false;
up->ier &= ~UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 1ce193daea7f1..fad00c0414e34 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -264,10 +264,8 @@ static void serial8250_backup_timeout(struct timer_list *t)
* Must disable interrupts or else we risk racing with the interrupt
* based handler.
*/
- if (up->port.irq) {
- ier = serial_in(up, UART_IER);
- serial_out(up, UART_IER, 0);
- }
+ if (up->port.irq)
+ ier = serial8250_clear_IER(up);
iir = serial_in(up, UART_IIR);
@@ -290,7 +288,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
serial8250_tx_chars(up);
if (up->port.irq)
- serial_out(up, UART_IER, ier);
+ serial8250_set_IER(up, ier);
spin_unlock_irqrestore(&up->port.lock, flags);
@@ -568,6 +566,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
#ifdef CONFIG_SERIAL_8250_CONSOLE
+static void univ8250_console_write_atomic(struct console *co, const char *s,
+ unsigned int count)
+{
+ struct uart_8250_port *up = &serial8250_ports[co->index];
+
+ serial8250_console_write_atomic(up, s, count);
+}
+
static void univ8250_console_write(struct console *co, const char *s,
unsigned int count)
{
@@ -661,6 +667,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
static struct console univ8250_console = {
.name = "ttyS",
+ .write_atomic = univ8250_console_write_atomic,
.write = univ8250_console_write,
.device = uart_console_device,
.setup = univ8250_console_setup,
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index 9c01c531349df..ea06cb67cf58f 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -56,9 +56,18 @@ int fsl8250_handle_irq(struct uart_port *port)
/* Stop processing interrupts on input overrun */
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
+ unsigned long flags;
unsigned long delay;
+ bool is_console;
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
up->ier = port->serial_in(port, UART_IER);
+ if (is_console)
+ console_atomic_unlock(flags);
+
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
port->ops->stop_rx(port);
} else {
diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
index 65402d05eff93..8122645ab05c9 100644
--- a/drivers/tty/serial/8250/8250_ingenic.c
+++ b/drivers/tty/serial/8250/8250_ingenic.c
@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
{
+ unsigned long flags;
+ bool is_console;
int ier;
switch (offset) {
@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
* If we have enabled modem status IRQs we should enable
* modem mode.
*/
+ is_console = uart_console(p);
+ if (is_console)
+ console_atomic_lock(flags);
ier = p->serial_in(p, UART_IER);
+ if (is_console)
+ console_atomic_unlock(flags);
if (ier & UART_IER_MSI)
value |= UART_MCR_MDCE | UART_MCR_FCM;
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index fb65dc601b237..9af18b5d8296f 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -218,12 +218,37 @@ static void mtk8250_shutdown(struct uart_port *port)
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier & (~mask));
+
+ if (is_console)
+ console_atomic_unlock(flags);
}
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+
+ if (uart_console(port))
+ console_atomic_lock(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier | mask);
+
+ if (uart_console(port))
+ console_atomic_unlock(flags);
}
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 5775cbff8f6eb..650de210f430e 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -762,7 +762,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
serial_out(p, UART_EFR, UART_EFR_ECB);
serial_out(p, UART_LCR, 0);
}
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
if (p->capabilities & UART_CAP_EFR) {
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
serial_out(p, UART_EFR, efr);
@@ -1436,7 +1436,7 @@ static void serial8250_stop_rx(struct uart_port *port)
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
up->port.read_status_mask &= ~UART_LSR_DR;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -1466,7 +1466,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
serial8250_clear_and_reinit_fifos(p);
p->ier |= UART_IER_RLSI | UART_IER_RDI;
- serial_port_out(&p->port, UART_IER, p->ier);
+ serial8250_set_IER(p, p->ier);
}
}
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
@@ -1688,7 +1688,7 @@ static void serial8250_disable_ms(struct uart_port *port)
mctrl_gpio_disable_ms(up->gpios);
up->ier &= ~UART_IER_MSI;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
}
static void serial8250_enable_ms(struct uart_port *port)
@@ -1704,7 +1704,7 @@ static void serial8250_enable_ms(struct uart_port *port)
up->ier |= UART_IER_MSI;
serial8250_rpm_get(up);
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -2132,14 +2132,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
struct uart_8250_port *up = up_to_u8250p(port);
serial8250_rpm_get(up);
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
wait_for_xmitr(up, BOTH_EMPTY);
/*
@@ -2152,7 +2145,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
* and restore the IER
*/
wait_for_xmitr(up, BOTH_EMPTY);
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
serial8250_rpm_put(up);
}
@@ -2455,7 +2448,7 @@ void serial8250_do_shutdown(struct uart_port *port)
*/
spin_lock_irqsave(&port->lock, flags);
up->ier = 0;
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
spin_unlock_irqrestore(&port->lock, flags);
synchronize_irq(port->irq);
@@ -2837,7 +2830,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
if (up->capabilities & UART_CAP_RTOIE)
up->ier |= UART_IER_RTOIE;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
if (up->capabilities & UART_CAP_EFR) {
unsigned char efr = 0;
@@ -3303,7 +3296,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
#ifdef CONFIG_SERIAL_8250_CONSOLE
-static void serial8250_console_putchar(struct uart_port *port, int ch)
+static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
{
struct uart_8250_port *up = up_to_u8250p(port);
@@ -3311,6 +3304,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
serial_port_out(port, UART_TX, ch);
}
+static void serial8250_console_putchar(struct uart_port *port, int ch)
+{
+ struct uart_8250_port *up = up_to_u8250p(port);
+ unsigned long flags;
+
+ wait_for_xmitr(up, UART_LSR_THRE);
+
+ console_atomic_lock(flags);
+ serial8250_console_putchar_locked(port, ch);
+ console_atomic_unlock(flags);
+}
+
/*
* Restore serial console when h/w power-off detected
*/
@@ -3332,6 +3337,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
}
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+ const char *s, unsigned int count)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+
+ console_atomic_lock(flags);
+
+ touch_nmi_watchdog();
+
+ ier = serial8250_clear_IER(up);
+
+ if (atomic_fetch_inc(&up->console_printing)) {
+ uart_console_write(port, "\n", 1,
+ serial8250_console_putchar_locked);
+ }
+ uart_console_write(port, s, count, serial8250_console_putchar_locked);
+ atomic_dec(&up->console_printing);
+
+ wait_for_xmitr(up, BOTH_EMPTY);
+ serial8250_set_IER(up, ier);
+
+ console_atomic_unlock(flags);
+}
+
/*
* Print a string to the serial port trying not to disturb
* any possible real use of the port...
@@ -3348,24 +3379,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
struct uart_port *port = &up->port;
unsigned long flags;
unsigned int ier;
- int locked = 1;
touch_nmi_watchdog();
- if (oops_in_progress)
- locked = spin_trylock_irqsave(&port->lock, flags);
- else
- spin_lock_irqsave(&port->lock, flags);
+ spin_lock_irqsave(&port->lock, flags);
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
-
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
/* check scratch reg to see if port powered off during system sleep */
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@@ -3379,7 +3398,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
mdelay(port->rs485.delay_rts_before_send);
}
+ atomic_inc(&up->console_printing);
uart_console_write(port, s, count, serial8250_console_putchar);
+ atomic_dec(&up->console_printing);
/*
* Finally, wait for transmitter to become empty
@@ -3392,8 +3413,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
if (em485->tx_stopped)
up->rs485_stop_tx(up);
}
-
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
/*
* The receive handling will happen properly because the
@@ -3405,8 +3425,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
if (up->msr_saved_flags)
serial8250_modem_status(up);
- if (locked)
- spin_unlock_irqrestore(&port->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
}
static unsigned int probe_baud(struct uart_port *port)
@@ -3426,6 +3445,7 @@ static unsigned int probe_baud(struct uart_port *port)
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
{
+ struct uart_8250_port *up = up_to_u8250p(port);
int baud = 9600;
int bits = 8;
int parity = 'n';
@@ -3435,6 +3455,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
if (!port->iobase && !port->membase)
return -ENODEV;
+ atomic_set(&up->console_printing, 0);
+
if (options)
uart_parse_options(options, &baud, &parity, &bits, &flow);
else if (probe)
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index d361cd84ff8cf..e1029c108e8ec 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2336,18 +2336,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
{
struct uart_amba_port *uap = amba_ports[co->index];
unsigned int old_cr = 0, new_cr;
- unsigned long flags;
+ unsigned long flags = 0;
int locked = 1;
clk_enable(uap->clk);
- local_irq_save(flags);
+ /*
+ * local_irq_save(flags);
+ *
+ * This local_irq_save() is nonsense. If we come in via sysrq
+ * handling then interrupts are already disabled. Aside of
+ * that the port.sysrq check is racy on SMP regardless.
+ */
if (uap->port.sysrq)
locked = 0;
else if (oops_in_progress)
- locked = spin_trylock(&uap->port.lock);
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
else
- spin_lock(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/*
* First save the CR then disable the interrupts
@@ -2373,8 +2379,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
pl011_write(old_cr, uap, REG_CR);
if (locked)
- spin_unlock(&uap->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
clk_disable(uap->clk);
}
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 0862941862c8b..10970632f0e47 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console *co, const char *s,
unsigned int ier;
int locked = 1;
- local_irq_save(flags);
- if (up->port.sysrq)
- locked = 0;
- else if (oops_in_progress)
- locked = spin_trylock(&up->port.lock);
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
else
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console *co, const char *s,
check_modem_status(up);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init
diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
index df5184979b282..d4ad211dce7a3 100644
--- a/drivers/virt/acrn/irqfd.c
+++ b/drivers/virt/acrn/irqfd.c
@@ -17,7 +17,6 @@
#include "acrn_drv.h"
static LIST_HEAD(acrn_irqfd_clients);
-static DEFINE_MUTEX(acrn_irqfds_mutex);
/**
* struct hsm_irqfd - Properties of HSM irqfd
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 45cfd50a95210..502b56597f107 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
struct dentry *alias;
int ret;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 1929e80c09ee1..48eb8c30c6dbf 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
struct inode *inode;
struct super_block *sb = parent->d_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fdc..02db80f2817fc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2537,7 +2537,13 @@ EXPORT_SYMBOL(d_rehash);
static inline unsigned start_dir_add(struct inode *dir)
{
-
+ /*
+ * The caller has a spinlock_t (dentry::d_lock) acquired which disables
+ * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable
+ * preemption and it has be done explicitly.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
for (;;) {
unsigned n = dir->i_dir_seq;
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2549,25 +2555,30 @@ static inline unsigned start_dir_add(struct inode *dir)
static inline void end_dir_add(struct inode *dir, unsigned n)
{
smp_store_release(&dir->i_dir_seq, n + 2);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
}
static void d_wait_lookup(struct dentry *dentry)
{
- if (d_in_lookup(dentry)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(dentry->d_wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&dentry->d_lock);
- schedule();
- spin_lock(&dentry->d_lock);
- } while (d_in_lookup(dentry));
- }
+ struct swait_queue __wait;
+
+ if (!d_in_lookup(dentry))
+ return;
+
+ INIT_LIST_HEAD(&__wait.task_list);
+ do {
+ prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&dentry->d_lock);
+ schedule();
+ spin_lock(&dentry->d_lock);
+ } while (d_in_lookup(dentry));
+ finish_swait(dentry->d_wait, &__wait);
}
struct dentry *d_alloc_parallel(struct dentry *parent,
const struct qstr *name,
- wait_queue_head_t *wq)
+ struct swait_queue_head *wq)
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2682,7 +2693,7 @@ void __d_lookup_done(struct dentry *dentry)
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
- wake_up_all(dentry->d_wait);
+ swake_up_all(dentry->d_wait);
dentry->d_wait = NULL;
hlist_bl_unlock(b);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index c3e4804b8fcbf..9edb87e11680b 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -81,7 +81,6 @@ extern unsigned fscache_debug;
extern struct kobject *fscache_root;
extern struct workqueue_struct *fscache_object_wq;
extern struct workqueue_struct *fscache_op_wq;
-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 4207f98e405fd..85f8cf3a323d5 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -41,8 +41,6 @@ struct kobject *fscache_root;
struct workqueue_struct *fscache_object_wq;
struct workqueue_struct *fscache_op_wq;
-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
-
/* these values serve as lower bounds, will be adjusted in fscache_init() */
static unsigned fscache_object_max_active = 4;
static unsigned fscache_op_max_active = 2;
@@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
static int __init fscache_init(void)
{
unsigned int nr_cpus = num_possible_cpus();
- unsigned int cpu;
int ret;
fscache_object_max_active =
@@ -161,9 +158,6 @@ static int __init fscache_init(void)
if (!fscache_op_wq)
goto error_op_wq;
- for_each_possible_cpu(cpu)
- init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
-
ret = fscache_proc_init();
if (ret < 0)
goto error_proc;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 6a675652129b2..7a972d144b546 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object)
}
EXPORT_SYMBOL(fscache_object_destroy);
+static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
+
/*
* enqueue an object for metadata-type processing
*/
@@ -806,16 +808,12 @@ void fscache_enqueue_object(struct fscache_object *object)
_enter("{OBJ%x}", object->debug_id);
if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
- wait_queue_head_t *cong_wq =
- &get_cpu_var(fscache_object_cong_wait);
if (queue_work(fscache_object_wq, &object->work)) {
if (fscache_object_congested())
- wake_up(cong_wq);
+ wake_up(&fscache_object_cong_wait);
} else
fscache_put_object(object, fscache_obj_put_queue);
-
- put_cpu_var(fscache_object_cong_wait);
}
}
@@ -833,16 +831,15 @@ void fscache_enqueue_object(struct fscache_object *object)
*/
bool fscache_object_sleep_till_congested(signed long *timeoutp)
{
- wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
DEFINE_WAIT(wait);
if (fscache_object_congested())
return true;
- add_wait_queue_exclusive(cong_wq, &wait);
+ add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
if (!fscache_object_congested())
*timeoutp = schedule_timeout(*timeoutp);
- finish_wait(cong_wq, &wait);
+ finish_wait(&fscache_object_cong_wait, &wait);
return fscache_object_congested();
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index b4e5657110457..5ef0c106fb9db 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file,
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
diff --git a/fs/namei.c b/fs/namei.c
index 1f9d2187c7655..49552c066ce57 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1633,7 +1633,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
@@ -3192,7 +3192,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return ERR_PTR(-ENOENT);
diff --git a/fs/namespace.c b/fs/namespace.c
index 659a8f39c61af..3ab45b47b2860 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -343,8 +343,24 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
- cpu_relax();
+ might_lock(&mount_lock.lock);
+ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ cpu_relax();
+ } else {
+ /*
+ * This prevents priority inversion, if the task
+ * setting MNT_WRITE_HOLD got preempted on a remote
+ * CPU, and it prevents life lock if the task setting
+ * MNT_WRITE_HOLD has a lower priority and is bound to
+ * the same CPU as the task that is spinning here.
+ */
+ preempt_enable();
+ lock_mount_hash();
+ unlock_mount_hash();
+ preempt_disable();
+ }
+ }
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 731d31015b6aa..d7c2571391b79 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -638,7 +638,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
struct inode *inode;
@@ -1860,7 +1860,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 5fa11e1aca4c2..984f26eb888c1 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,7 +13,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/swait.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
@@ -180,7 +180,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
data->cred = get_current_cred();
data->res.dir_attr = &data->dir_attr;
- init_waitqueue_head(&data->wq);
+ init_swait_queue_head(&data->wq);
status = -EBUSY;
spin_lock(&dentry->d_lock);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 13eda8de29981..a68c26e908812 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -96,6 +96,7 @@
#include <linux/posix-timers.h>
#include <linux/time_namespace.h>
#include <linux/resctrl.h>
+#include <linux/swait.h>
#include <linux/cn_proc.h>
#include <trace/events/oom.h>
#include "internal.h"
@@ -2045,7 +2046,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
child = d_hash_and_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
goto end_instantiate;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5d66faecd4ef0..619d8e114646d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -678,7 +678,7 @@ static bool proc_sys_fill_cache(struct file *file,
child = d_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
return false;
diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h
index eceeecf6a5bd8..d3e2d81656e04 100644
--- a/include/asm-generic/softirq_stack.h
+++ b/include/asm-generic/softirq_stack.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
#define __ASM_GENERIC_SOFTIRQ_STACK_H
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
void do_softirq_own_stack(void);
#else
static inline void do_softirq_own_stack(void)
diff --git a/include/linux/console.h b/include/linux/console.h
index a97f277cfdfa3..606fdf91001f0 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,13 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/seqlock.h>
+
+struct latched_seq {
+ seqcount_latch_t latch;
+ u64 val[2];
+};
struct vc_data;
struct console_font_op;
@@ -136,10 +143,12 @@ static inline int con_debug_leave(void)
#define CON_ANYTIME (16) /* Safe to call when cpu is offline */
#define CON_BRL (32) /* Used for a braille device */
#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
+#define CON_HANDOVER (128) /* Device was previously a boot console. */
struct console {
char name[16];
void (*write)(struct console *, const char *, unsigned);
+ void (*write_atomic)(struct console *co, const char *s, unsigned int count);
int (*read)(struct console *, char *, unsigned);
struct tty_driver *(*device)(struct console *, int *);
void (*unblank)(void);
@@ -149,8 +158,17 @@ struct console {
short flags;
short index;
int cflag;
+#ifdef CONFIG_PRINTK
+ char sync_buf[CONSOLE_LOG_MAX];
+ struct latched_seq printk_seq;
+ struct latched_seq printk_sync_seq;
+#ifdef CONFIG_HAVE_NMI
+ struct latched_seq printk_sync_nmi_seq;
+#endif
+#endif /* CONFIG_PRINTK */
uint ispeed;
uint ospeed;
+ struct task_struct *thread;
void *data;
struct console *next;
};
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1c..9f89d4887e35d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -108,7 +108,7 @@ struct dentry {
union {
struct list_head d_lru; /* LRU list */
- wait_queue_head_t *d_wait; /* in-lookup ones only */
+ struct swait_queue_head *d_wait; /* in-lookup ones only */
};
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
@@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
- wait_queue_head_t *);
+ struct swait_queue_head *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 2e2b8d6140ed4..71064a2c2cafe 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -57,9 +57,15 @@
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
#endif
+#ifdef CONFIG_PREEMPT_LAZY
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+#else
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED)
+#endif
+
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 93d270ca0c567..5cf06e8287d57 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -68,6 +68,7 @@ struct irq_desc {
unsigned int irqs_unhandled;
atomic_t threads_handled;
int threads_handled_last;
+ u64 random_ip;
raw_spinlock_t lock;
struct cpumask *percpu_enabled;
const struct cpumask *percpu_affinity;
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 600c10da321a7..4b140938b03e2 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -71,14 +71,6 @@ do { \
do { \
__this_cpu_dec(hardirq_context); \
} while (0)
-# define lockdep_softirq_enter() \
-do { \
- current->softirq_context++; \
-} while (0)
-# define lockdep_softirq_exit() \
-do { \
- current->softirq_context--; \
-} while (0)
# define lockdep_hrtimer_enter(__hrtimer) \
({ \
@@ -140,6 +132,21 @@ do { \
# define lockdep_irq_work_exit(__work) do { } while (0)
#endif
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
+# define lockdep_softirq_enter() \
+do { \
+ current->softirq_context++; \
+} while (0)
+# define lockdep_softirq_exit() \
+do { \
+ current->softirq_context--; \
+} while (0)
+
+#else
+# define lockdep_softirq_enter() do { } while (0)
+# define lockdep_softirq_exit() do { } while (0)
+#endif
+
#if defined(CONFIG_IRQSOFF_TRACER) || \
defined(CONFIG_PREEMPT_TRACER)
extern void stop_critical_timings(void);
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 258cdde8d356b..9bca0d98db5a1 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored);
*/
extern void kgdb_roundup_cpus(void);
+extern void kgdb_roundup_cpu(unsigned int cpu);
+
/**
* kgdb_arch_set_pc - Generic call back to the program counter
* @regs: Current &struct pt_regs.
@@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void);
#define dbg_late_init()
static inline void kgdb_panic(const char *msg) {}
static inline void kgdb_free_init_mem(void) { }
+static inline void kgdb_roundup_cpu(unsigned int cpu) {}
#endif /* ! CONFIG_KGDB */
#endif /* _KGDB_H_ */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 967a0098f0a97..57979c3dc4a7f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata {
struct nfs_removeargs args;
struct nfs_removeres res;
struct dentry *dentry;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
const struct cred *cred;
struct nfs_fattr dir_attr;
long timeout;
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index b4381f255a5ca..c05c5247986f7 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val) do { } while (0)
+#define sub_preempt_lazy_count(val) do { } while (0)
+#define inc_preempt_lazy_count() do { } while (0)
+#define dec_preempt_lazy_count() do { } while (0)
+#define preempt_lazy_count() (0)
+#endif
+
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
@@ -204,13 +218,25 @@ do { \
barrier(); \
} while (0)
+#define preempt_lazy_disable() \
+do { \
+ inc_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
preempt_count_dec(); \
} while (0)
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#ifndef CONFIG_PREEMPT_RT
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+# define preempt_check_resched_rt() barrier();
+#else
+# define preempt_enable_no_resched() preempt_enable()
+# define preempt_check_resched_rt() preempt_check_resched()
+#endif
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
@@ -235,6 +261,18 @@ do { \
__preempt_schedule(); \
} while (0)
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+ if (should_resched(0)) \
+ __preempt_schedule(); \
+} while (0)
+
#else /* !CONFIG_PREEMPTION */
#define preempt_enable() \
do { \
@@ -242,6 +280,12 @@ do { \
preempt_count_dec(); \
} while (0)
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define preempt_enable_notrace() \
do { \
barrier(); \
@@ -280,8 +324,12 @@ do { \
#define preempt_disable_notrace() barrier()
#define preempt_enable_no_resched_notrace() barrier()
#define preempt_enable_notrace() barrier()
+#define preempt_check_resched_rt() barrier()
#define preemptible() 0
+#define preempt_lazy_disable() barrier()
+#define preempt_lazy_enable() barrier()
+
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE
@@ -300,7 +348,7 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
- if (tif_need_resched()) \
+ if (tif_need_resched_now()) \
set_preempt_need_resched(); \
} while (0)
@@ -416,8 +464,15 @@ extern void migrate_enable(void);
#else
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+ preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+ preempt_lazy_enable();
+}
#endif /* CONFIG_SMP */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 85b656f82d752..b31aedc7102ac 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer)
#define CONSOLE_EXT_LOG_MAX 8192
+/*
+ * The maximum size of a record formatted for console printing
+ * (i.e. with the prefix prepended to every line).
+ */
+#define CONSOLE_LOG_MAX 1024
+
/* printk's without a loglevel use this.. */
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
@@ -155,20 +161,7 @@ int vprintk(const char *fmt, va_list args);
asmlinkage __printf(1, 2) __cold
int _printk(const char *fmt, ...);
-/*
- * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
- */
-__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
-
-extern void __printk_safe_enter(void);
-extern void __printk_safe_exit(void);
-/*
- * The printk_deferred_enter/exit macros are available only as a hack for
- * some code paths that need to defer all printk console printing. Interrupts
- * must be disabled for the deferred duration.
- */
-#define printk_deferred_enter __printk_safe_enter
-#define printk_deferred_exit __printk_safe_exit
+bool pr_flush(int timeout_ms, bool reset_on_progress);
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -209,18 +202,10 @@ int _printk(const char *s, ...)
{
return 0;
}
-static inline __printf(1, 2) __cold
-int _printk_deferred(const char *s, ...)
-{
- return 0;
-}
-static inline void printk_deferred_enter(void)
-{
-}
-
-static inline void printk_deferred_exit(void)
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
{
+ return true;
}
static inline int printk_ratelimit(void)
@@ -280,17 +265,30 @@ static inline void dump_stack(void)
extern int __printk_cpu_trylock(void);
extern void __printk_wait_on_cpu_lock(void);
extern void __printk_cpu_unlock(void);
+extern bool kgdb_roundup_delay(unsigned int cpu);
+
+#else
+
+#define __printk_cpu_trylock() 1
+#define __printk_wait_on_cpu_lock()
+#define __printk_cpu_unlock()
+
+static inline bool kgdb_roundup_delay(unsigned int cpu)
+{
+ return false;
+}
+#endif /* CONFIG_SMP */
/**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- * lock and disable interrupts.
+ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ * lock and disable interrupts.
* @flags: Stack-allocated storage for saving local interrupt state,
- * to be passed to printk_cpu_unlock_irqrestore().
+ * to be passed to raw_printk_cpu_unlock_irqrestore().
*
* If the lock is owned by another CPU, spin until it becomes available.
* Interrupts are restored while spinning.
*/
-#define printk_cpu_lock_irqsave(flags) \
+#define raw_printk_cpu_lock_irqsave(flags) \
for (;;) { \
local_irq_save(flags); \
if (__printk_cpu_trylock()) \
@@ -300,22 +298,30 @@ extern void __printk_cpu_unlock(void);
}
/**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- * lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
+ * spinning lock and restore interrupts.
+ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
*/
-#define printk_cpu_unlock_irqrestore(flags) \
+#define raw_printk_cpu_unlock_irqrestore(flags) \
do { \
__printk_cpu_unlock(); \
local_irq_restore(flags); \
- } while (0) \
+ } while (0)
-#else
+/*
+ * Used to synchronize atomic consoles.
+ *
+ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
+ * are _not_ restored while spinning.
+ */
+#define console_atomic_lock(flags) \
+ do { \
+ local_irq_save(flags); \
+ while (!__printk_cpu_trylock()) \
+ cpu_relax(); \
+ } while (0)
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
-
-#endif /* CONFIG_SMP */
+#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
extern int kptr_restrict;
@@ -444,8 +450,6 @@ struct pi_entry {
* See the vsnprintf() documentation for format string extensions over C99.
*/
#define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
-#define printk_deferred(fmt, ...) \
- printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__)
/**
* pr_emerg - Print an emergency-level message
@@ -583,13 +587,9 @@ struct pi_entry {
#ifdef CONFIG_PRINTK
#define printk_once(fmt, ...) \
DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__)
-#define printk_deferred_once(fmt, ...) \
- DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__)
#else
#define printk_once(fmt, ...) \
no_printk(fmt, ##__VA_ARGS__)
-#define printk_deferred_once(fmt, ...) \
- no_printk(fmt, ##__VA_ARGS__)
#endif
#define pr_emerg_once(fmt, ...) \
diff --git a/include/linux/random.h b/include/linux/random.h
index f45b8be3e3c4e..0e41d05278091 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {}
extern void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
-extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
+extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy;
extern void get_random_bytes(void *buf, int nbytes);
extern int wait_for_random_bytes(void);
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index b676aa419eef8..c21c7f8103e2b 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -4,7 +4,7 @@
#include <linux/bits.h>
#include <linux/param.h>
-#include <linux/spinlock_types.h>
+#include <linux/spinlock_types_raw.h>
#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
#define DEFAULT_RATELIMIT_BURST 10
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5e0beb5c5659a..3c61f246966d5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -95,6 +95,13 @@ void rcu_init_tasks_generic(void);
static inline void rcu_init_tasks_generic(void) { }
#endif
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
+void rcu_tasks_initiate_self_tests(void);
+#else
+static inline void rcu_tasks_initiate_self_tests(void) {}
+#endif
+
+
#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 9deedfeec2b17..7d049883a08ac 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) \
+ do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
+ } while (0)
+
#else
extern void rt_mutex_lock(struct rt_mutex *lock);
#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
#endif
extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
extern int rt_mutex_trylock(struct rt_mutex *lock);
extern void rt_mutex_unlock(struct rt_mutex *lock);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec6..971d20337ad34 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,12 +118,8 @@ struct task_group;
#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-
/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
@@ -1082,6 +1078,10 @@ struct task_struct {
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
+#ifdef CONFIG_PREEMPT_RT
+ /* TODO: move me into ->restart_block ? */
+ struct kernel_siginfo forced_info;
+#endif
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
@@ -1727,6 +1727,16 @@ static __always_inline bool is_percpu_thread(void)
#endif
}
+/* Is the current task guaranteed to stay on its current CPU? */
+static inline bool is_migratable(void)
+{
+#ifdef CONFIG_SMP
+ return preemptible() && !current->migration_disabled;
+#else
+ return false;
+#endif
+}
+
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
@@ -1999,6 +2009,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
+#ifdef CONFIG_PREEMPT_RT
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+ return p->saved_state == match_state;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+ bool traced = false;
+
+ /* in case the task is sleeping on tasklist_lock */
+ raw_spin_lock_irq(&task->pi_lock);
+ if (READ_ONCE(task->__state) & __TASK_TRACED)
+ traced = true;
+ else if (task->saved_state & __TASK_TRACED)
+ traced = true;
+ raw_spin_unlock_irq(&task->pi_lock);
+ return traced;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+ bool traced_stopped = false;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
+ traced_stopped = true;
+ else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
+ traced_stopped = true;
+
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return traced_stopped;
+}
+
+#else
+
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+ return false;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+ return READ_ONCE(task->__state) & __TASK_TRACED;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+ return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
+}
+#endif
+
+static inline bool task_match_state_or_saved(struct task_struct *p,
+ long match_state)
+{
+ if (READ_ONCE(p->__state) == match_state)
+ return true;
+
+ return task_match_saved_state(p, match_state);
+}
+
+static inline bool task_match_state_lock(struct task_struct *p,
+ long match_state)
+{
+ bool match;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ match = task_match_state_or_saved(p, match_state);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ return match;
+}
+
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d819..ccd1336aa7f42 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -70,6 +70,7 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
}
extern void put_task_stack(struct task_struct *tsk);
+extern void put_task_stack_sched(struct task_struct *tsk);
#else
static inline void *try_get_task_stack(struct task_struct *tsk)
{
@@ -77,8 +78,17 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
}
static inline void put_task_stack(struct task_struct *tsk) {}
+static inline void put_task_stack_sched(struct task_struct *tsk) {}
#endif
+#ifdef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
+static inline void task_stack_cleanup(struct task_struct *tsk) {}
+#else
+extern void task_stack_cleanup(struct task_struct *tsk);
+#endif
+
+void exit_task_stack_account(struct task_struct *tsk);
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 5db211f43b29e..aa011f6687051 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -7,6 +7,7 @@
#ifndef _LINUX_SERIAL_8250_H
#define _LINUX_SERIAL_8250_H
+#include <linux/atomic.h>
#include <linux/serial_core.h>
#include <linux/serial_reg.h>
#include <linux/platform_device.h>
@@ -125,6 +126,8 @@ struct uart_8250_port {
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
unsigned char msr_saved_flags;
+ atomic_t console_printing;
+
struct uart_8250_dma *dma;
const struct uart_8250_ops *ops;
@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up);
void serial8250_set_defaults(struct uart_8250_port *up);
void serial8250_console_write(struct uart_8250_port *up, const char *s,
unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
+ unsigned int count);
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
int serial8250_console_exit(struct uart_port *port);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666d073d5..aa60bc21c063b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -297,6 +297,7 @@ struct sk_buff_head {
__u32 qlen;
spinlock_t lock;
+ raw_spinlock_t raw_lock;
};
struct sk_buff;
@@ -1953,6 +1954,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
__skb_queue_head_init(list);
}
+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
+{
+ raw_spin_lock_init(&list->raw_lock);
+ __skb_queue_head_init(list);
+}
+
static inline void skb_queue_head_init_class(struct sk_buff_head *list,
struct lock_class_key *class)
{
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a80ab58ae3f1d..dd3441d8af448 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -267,6 +267,9 @@ static inline int get_boot_cpu_id(void)
#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
#define put_cpu() preempt_enable()
+#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); })
+#define put_cpu_light() migrate_enable()
+
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index c09b6407ae1b3..7f86a2016ac5c 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_SPINLOCK_TYPES_UP_H
#define __LINUX_SPINLOCK_TYPES_UP_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 8af13ba60c7e4..79b6933ef8a03 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -550,23 +550,17 @@ static inline void unlock_system_sleep(void) {}
#ifdef CONFIG_PM_SLEEP_DEBUG
extern bool pm_print_times_enabled;
extern bool pm_debug_messages_on;
-extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...);
+extern __printf(1, 2) void pm_pr_dbg(const char *fmt, ...);
#else
#define pm_print_times_enabled (false)
#define pm_debug_messages_on (false)
#include <linux/printk.h>
-#define __pm_pr_dbg(defer, fmt, ...) \
+#define pm_pr_dbg(fmt, ...) \
no_printk(KERN_DEBUG fmt, ##__VA_ARGS__)
#endif
-#define pm_pr_dbg(fmt, ...) \
- __pm_pr_dbg(false, fmt, ##__VA_ARGS__)
-
-#define pm_deferred_pr_dbg(fmt, ...) \
- __pm_pr_dbg(true, fmt, ##__VA_ARGS__)
-
#ifdef CONFIG_PM_AUTOSLEEP
/* kernel/power/autosleep.c */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ad0c4e0410305..3033c8f052985 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
+ test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy() 0
+#endif
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 50453b2876155..b873e7dcd9aa3 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -69,6 +69,7 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
+ unsigned char preempt_lazy_count;
};
#define TRACE_EVENT_TYPE_MAX \
@@ -157,9 +158,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
unsigned int trace_ctx)
{
entry->preempt_count = trace_ctx & 0xff;
+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
entry->pid = current->pid;
entry->type = type;
- entry->flags = trace_ctx >> 16;
+ entry->flags = trace_ctx >> 24;
}
unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@@ -172,6 +174,7 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
TRACE_FLAG_NMI = 0x40,
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
};
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e8ec116c916bf..81dc1f5e181ac 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -66,7 +66,7 @@
#include <linux/seqlock.h>
struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
seqcount_t seq;
#endif
};
@@ -125,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
}
#endif
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
@@ -135,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
write_seqcount_begin(&syncp->seq);
#endif
}
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
#endif
}
@@ -152,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
{
unsigned long flags = 0;
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- local_irq_save(flags);
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+ else
+ local_irq_save(flags);
write_seqcount_begin(&syncp->seq);
#endif
return flags;
@@ -163,15 +170,18 @@ static inline void
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
unsigned long flags)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
- local_irq_restore(flags);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ else
+ local_irq_restore(flags);
#endif
}
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_begin(&syncp->seq);
#else
return 0;
@@ -180,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_disable();
#endif
return __u64_stats_fetch_begin(syncp);
@@ -189,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_retry(&syncp->seq, start);
#else
return false;
@@ -199,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
@@ -213,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
*/
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+ preempt_disable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_disable();
#endif
return __u64_stats_fetch_begin(syncp);
@@ -222,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+ preempt_enable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
diff --git a/init/Kconfig b/init/Kconfig
index 036b750e8d8a8..219831245b5a8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -943,6 +943,7 @@ config PAGE_COUNTER
config MEMCG
bool "Memory controller"
+ depends on !PREEMPT_RT
select PAGE_COUNTER
select EVENTFD
help
diff --git a/init/main.c b/init/main.c
index bb984ed79de0e..eb30d1f729e92 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1597,6 +1597,7 @@ static noinline void __init kernel_init_freeable(void)
rcu_init_tasks_generic();
do_pre_smp_initcalls();
+ rcu_tasks_initiate_self_tests();
lockup_detector_init();
smp_init();
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index ce77f02656603..5d3e650cdf489 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,5 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
+config HAVE_PREEMPT_LAZY
+ bool
+
+config PREEMPT_LAZY
+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
+
config PREEMPT_NONE_BUILD
bool
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1486768f23185..bb3b805436c49 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -156,8 +156,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
cpu);
struct cgroup *pos = NULL;
+ unsigned long flags;
- raw_spin_lock(cpu_lock);
+ raw_spin_lock_irqsave(cpu_lock, flags);
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
struct cgroup_subsys_state *css;
@@ -169,7 +170,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
css->ss->css_rstat_flush(css, cpu);
rcu_read_unlock();
}
- raw_spin_unlock(cpu_lock);
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
/* if @may_sleep, play nice and yield if necessary */
if (may_sleep && (need_resched() ||
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index da06a5553835b..3e39636da8427 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -238,35 +238,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
CSD_INIT(kgdb_call_nmi_hook, NULL);
-void __weak kgdb_roundup_cpus(void)
+void __weak kgdb_roundup_cpu(unsigned int cpu)
{
call_single_data_t *csd;
+ int ret;
+
+ csd = &per_cpu(kgdb_roundup_csd, cpu);
+
+ /*
+ * If it didn't round up last time, don't try again
+ * since smp_call_function_single_async() will block.
+ *
+ * If rounding_up is false then we know that the
+ * previous call must have at least started and that
+ * means smp_call_function_single_async() won't block.
+ */
+ if (kgdb_info[cpu].rounding_up)
+ return;
+ kgdb_info[cpu].rounding_up = true;
+
+ ret = smp_call_function_single_async(cpu, csd);
+ if (ret)
+ kgdb_info[cpu].rounding_up = false;
+}
+NOKPROBE_SYMBOL(kgdb_roundup_cpu);
+
+void __weak kgdb_roundup_cpus(void)
+{
int this_cpu = raw_smp_processor_id();
int cpu;
- int ret;
for_each_online_cpu(cpu) {
/* No need to roundup ourselves */
if (cpu == this_cpu)
continue;
- csd = &per_cpu(kgdb_roundup_csd, cpu);
-
- /*
- * If it didn't round up last time, don't try again
- * since smp_call_function_single_async() will block.
- *
- * If rounding_up is false then we know that the
- * previous call must have at least started and that
- * means smp_call_function_single_async() won't block.
- */
- if (kgdb_info[cpu].rounding_up)
- continue;
- kgdb_info[cpu].rounding_up = true;
-
- ret = smp_call_function_single_async(cpu, csd);
- if (ret)
- kgdb_info[cpu].rounding_up = false;
+ kgdb_roundup_cpu(cpu);
}
}
NOKPROBE_SYMBOL(kgdb_roundup_cpus);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 6735ac36b7187..539a2f0dc89d9 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len)
cp++;
}
+ /* mirror output on atomic consoles */
for_each_console(c) {
if (!(c->flags & CON_ENABLED))
continue;
if (c == dbg_io_ops->cons)
continue;
- /*
- * Set oops_in_progress to encourage the console drivers to
- * disregard their internal spin locks: in the current calling
- * context the risk of deadlock is a bigger problem than risks
- * due to re-entering the console driver. We operate directly on
- * oops_in_progress rather than using bust_spinlocks() because
- * the calls bust_spinlocks() makes on exit are not appropriate
- * for this calling context.
- */
- ++oops_in_progress;
- c->write(c, msg, msg_len);
- --oops_in_progress;
+
+ if (!c->write_atomic)
+ continue;
+ c->write_atomic(c, msg, msg_len);
+
touch_nmi_watchdog();
}
}
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index d5a61d565ad5d..a9579f8bf4f04 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -159,9 +159,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
local_irq_enable_exit_to_user(ti_work);
- if (ti_work & _TIF_NEED_RESCHED)
+ if (ti_work & _TIF_NEED_RESCHED_MASK)
schedule();
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(&t->forced_info);
+ t->forced_info.si_signo = 0;
+ }
+#endif
+
if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -387,7 +395,7 @@ void irqentry_exit_cond_resched(void)
rcu_irq_exit_check_preempt();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack());
- if (need_resched())
+ if (should_resched(0))
preempt_schedule_irq();
}
}
diff --git a/kernel/exit.c b/kernel/exit.c
index f702a6a63686e..383a56795e82a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -171,6 +171,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
kprobe_flush_task(tsk);
perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
+ task_stack_cleanup(tsk);
put_task_struct(tsk);
}
@@ -871,6 +872,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index 3244cc56b697d..15286e469cf2e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -178,13 +178,23 @@ static inline void free_task_struct(struct task_struct *tsk)
#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
+#define THREAD_STACK_DELAYED_FREE 1UL
+
+static void thread_stack_mark_delayed_free(struct task_struct *tsk)
+{
+ unsigned long val = (unsigned long)tsk->stack;
+
+ val |= THREAD_STACK_DELAYED_FREE;
+ WRITE_ONCE(tsk->stack, (void *)val);
+}
+
/*
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
* kmemcache based allocator.
*/
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
-#ifdef CONFIG_VMAP_STACK
+# ifdef CONFIG_VMAP_STACK
/*
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
* flush. Try to minimize the number of calls by caching stacks.
@@ -209,11 +219,35 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-#endif
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
-#ifdef CONFIG_VMAP_STACK
+ int i;
+ int ret;
+
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ /*
+ * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+ * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+ * ignore this page.
+ */
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ return ret;
+}
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ struct vm_struct *vm;
void *stack;
int i;
@@ -231,9 +265,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
+ if (memcg_charge_kernel_stack(s)) {
+ vfree(s->addr);
+ return -ENOMEM;
+ }
+
tsk->stack_vm_area = s;
tsk->stack = s->addr;
- return s->addr;
+ return 0;
}
/*
@@ -246,71 +285,93 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
+ if (!stack)
+ return -ENOMEM;
+ vm = find_vm_area(stack);
+ if (memcg_charge_kernel_stack(vm)) {
+ vfree(stack);
+ return -ENOMEM;
+ }
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- if (stack) {
- tsk->stack_vm_area = find_vm_area(stack);
- tsk->stack = stack;
+ tsk->stack_vm_area = vm;
+ tsk->stack = stack;
+ return 0;
+}
+
+static void free_thread_stack(struct task_struct *tsk, bool cache_only)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL,
+ tsk->stack_vm_area) != NULL)
+ continue;
+
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+ return;
}
- return stack;
-#else
+ if (cache_only) {
+ thread_stack_mark_delayed_free(tsk);
+ return;
+ }
+
+ vfree(tsk->stack);
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+}
+
+# else /* !CONFIG_VMAP_STACK */
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
if (likely(page)) {
tsk->stack = kasan_reset_tag(page_address(page));
- return tsk->stack;
+ return 0;
}
- return NULL;
-#endif
+ return -ENOMEM;
}
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk, bool cache_only)
{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
- int i;
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
-
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i],
- NULL, tsk->stack_vm_area) != NULL)
- continue;
-
- return;
- }
-
- vfree_atomic(tsk->stack);
+ if (cache_only) {
+ thread_stack_mark_delayed_free(tsk);
return;
}
-#endif
-
__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ tsk->stack = NULL;
}
-# else
+
+# endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
+
static struct kmem_cache *thread_stack_cache;
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
stack = kasan_reset_tag(stack);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
-static void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk, bool cache_only)
{
+ if (cache_only) {
+ thread_stack_mark_delayed_free(tsk);
+ return;
+ }
kmem_cache_free(thread_stack_cache, tsk->stack);
+ tsk->stack = NULL;
}
void thread_stack_cache_init(void)
@@ -320,8 +381,36 @@ void thread_stack_cache_init(void)
THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+
+void task_stack_cleanup(struct task_struct *tsk)
+{
+ unsigned long val = (unsigned long)tsk->stack;
+
+ if (!(val & THREAD_STACK_DELAYED_FREE))
+ return;
+
+ WRITE_ONCE(tsk->stack, (void *)(val & ~THREAD_STACK_DELAYED_FREE));
+ free_thread_stack(tsk, false);
+}
+
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ unsigned long *stack;
+
+ stack = arch_alloc_thread_stack_node(tsk, node);
+ tsk->stack = stack;
+ return stack ? 0 : -ENOMEM;
+}
+
+static void free_thread_stack(struct task_struct *tsk, bool cache_only)
+{
+ arch_free_thread_stack(tsk);
+}
+
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
@@ -376,70 +465,55 @@ void vm_area_free(struct vm_area_struct *vma)
static void account_kernel_stack(struct task_struct *tsk, int account)
{
- void *stack = task_stack_page(tsk);
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
account * (PAGE_SIZE / 1024));
} else {
+ void *stack = task_stack_page(tsk);
+
/* All stack pages are in the same node. */
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
}
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+void exit_task_stack_account(struct task_struct *tsk)
{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
- int ret;
+ account_kernel_stack(tsk, -1);
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm;
int i;
- BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- /*
- * If memcg_kmem_charge_page() fails, page's
- * memory cgroup pointer is NULL, and
- * memcg_kmem_uncharge_page() in free_thread_stack()
- * will ignore this page.
- */
- ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
- 0);
- if (ret)
- return ret;
- }
+ vm = task_stack_vm_area(tsk);
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
}
-#endif
- return 0;
}
-static void release_task_stack(struct task_struct *tsk)
+static void release_task_stack(struct task_struct *tsk, bool cache_only)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- account_kernel_stack(tsk, -1);
- free_thread_stack(tsk);
- tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = NULL;
-#endif
+ free_thread_stack(tsk, cache_only);
}
#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk)
{
if (refcount_dec_and_test(&tsk->stack_refcount))
- release_task_stack(tsk);
+ release_task_stack(tsk, false);
+}
+
+void put_task_stack_sched(struct task_struct *tsk)
+{
+ if (refcount_dec_and_test(&tsk->stack_refcount))
+ release_task_stack(tsk, true);
}
#endif
@@ -453,7 +527,7 @@ void free_task(struct task_struct *tsk)
* The task is finally done with both the stack and thread_info,
* so free both.
*/
- release_task_stack(tsk);
+ release_task_stack(tsk, false);
#else
/*
* If the task had a separate stack allocation, it should be gone
@@ -873,8 +947,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
- unsigned long *stack;
- struct vm_struct *stack_vm_area __maybe_unused;
int err;
if (node == NUMA_NO_NODE)
@@ -883,32 +955,18 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!tsk)
return NULL;
- stack = alloc_thread_stack_node(tsk, node);
- if (!stack)
+ err = arch_dup_task_struct(tsk, orig);
+ if (err)
goto free_tsk;
- if (memcg_charge_kernel_stack(tsk))
- goto free_stack;
+ err = alloc_thread_stack_node(tsk, node);
+ if (err)
+ goto free_tsk;
- stack_vm_area = task_stack_vm_area(tsk);
-
- err = arch_dup_task_struct(tsk, orig);
-
- /*
- * arch_dup_task_struct() clobbers the stack-related fields. Make
- * sure they're properly initialized before using any stack-related
- * functions again.
- */
- tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = stack_vm_area;
-#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
-
- if (err)
- goto free_stack;
+ account_kernel_stack(tsk, 1);
err = scs_prepare(tsk, node);
if (err)
@@ -952,8 +1010,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->wake_q.next = NULL;
tsk->pf_io_worker = NULL;
- account_kernel_stack(tsk, 1);
-
kcov_task_init(tsk);
kmap_local_fork(tsk);
@@ -972,7 +1028,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
return tsk;
free_stack:
- free_thread_stack(tsk);
+ exit_task_stack_account(tsk);
+ free_thread_stack(tsk, false);
free_tsk:
free_task_struct(tsk);
return NULL;
@@ -2468,6 +2525,7 @@ static __latent_entropy struct task_struct *copy_process(
exit_creds(p);
bad_fork_free:
WRITE_ONCE(p->__state, TASK_DEAD);
+ exit_task_stack_account(p);
put_task_stack(p);
delayed_free_task(p);
fork_out:
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 27182003b879c..a86a503b2a11a 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -192,12 +192,18 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
- irqreturn_t retval;
+ struct pt_regs *regs = get_irq_regs();
+ u64 ip = regs ? instruction_pointer(regs) : 0;
unsigned int flags = 0;
+ irqreturn_t retval;
retval = __handle_irq_event_percpu(desc, &flags);
- add_interrupt_randomness(desc->irq_data.irq, flags);
+#ifdef CONFIG_PREEMPT_RT
+ desc->random_ip = ip;
+#else
+ add_interrupt_randomness(desc->irq_data.irq, flags, ip);
+#endif
if (!irq_settings_no_debug(desc))
note_interrupt(desc, retval);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7405e384e5ed0..894e4db1fffcc 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1281,6 +1281,12 @@ static int irq_thread(void *data)
if (action_ret == IRQ_WAKE_THREAD)
irq_wake_secondary(desc, action);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ migrate_disable();
+ add_interrupt_randomness(action->irq, 0,
+ desc->random_ip ^ (unsigned long) action);
+ migrate_enable();
+ }
wake_threads_waitq(desc);
}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 35859da8bd4f7..dfff31ed644a6 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,6 +138,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
#endif /* CONFIG_CRASH_CORE */
+#if defined(CONFIG_PREEMPT_RT)
+static ssize_t realtime_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", 1);
+}
+KERNEL_ATTR_RO(realtime);
+#endif
+
/* whether file capabilities are enabled */
static ssize_t fscaps_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -228,6 +237,9 @@ static struct attribute * kernel_attrs[] = {
#ifndef CONFIG_TINY_RCU
&rcu_expedited_attr.attr,
&rcu_normal_attr.attr,
+#endif
+#ifdef CONFIG_PREEMPT_RT
+ &realtime_attr.attr,
#endif
NULL
};
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 2270ec68f10a1..4a882f83aeb9d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5485,6 +5485,7 @@ static noinstr void check_flags(unsigned long flags)
}
}
+#ifndef CONFIG_PREEMPT_RT
/*
* We dont accurately track softirq state in e.g.
* hardirq contexts (such as on 4KSTACKS), so only
@@ -5499,6 +5500,7 @@ static noinstr void check_flags(unsigned long flags)
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
}
}
+#endif
if (!debug_locks)
print_irqtrace_events(current);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 0c6a48dfcecb3..5e19fe7dda4c5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1104,8 +1104,26 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
* which is wrong, as the other waiter is not in a deadlock
* situation.
*/
- if (owner == task)
+ if (owner == task) {
+#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS)
+ /*
+ * The lockdep selftest for ww-mutex assumes in a few cases
+ * the ww_ctx->contending_lock assignment via
+ * __ww_mutex_check_kill() which does not happen if the rtmutex
+ * detects the deadlock early.
+ */
+ if (build_ww_mutex() && ww_ctx) {
+ struct rt_mutex *rtm;
+
+ /* Check whether the waiter should backout immediately */
+ rtm = container_of(lock, struct rt_mutex, rtmutex);
+
+ __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+ __ww_mutex_check_kill(rtm, waiter, ww_ctx);
+ }
+#endif
return -EDEADLK;
+ }
raw_spin_lock(&task->pi_lock);
waiter->task = task;
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 5c9299aaabae1..900220941caac 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -21,12 +21,13 @@ int max_lock_depth = 1024;
*/
static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
unsigned int state,
+ struct lockdep_map *nest_lock,
unsigned int subclass)
{
int ret;
might_sleep();
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);
ret = __rt_mutex_lock(&lock->rtmutex, state);
if (ret)
mutex_release(&lock->dep_map, _RET_IP_);
@@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init);
*/
void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
{
- __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
+{
+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
+}
+EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
+
#else /* !CONFIG_DEBUG_LOCK_ALLOC */
/**
@@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
*/
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
- __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
#endif
@@ -77,10 +84,25 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
*/
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
{
- return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+ return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+/**
+ * rt_mutex_lock_killable - lock a rt_mutex killable
+ *
+ * @lock: the rt_mutex to be locked
+ *
+ * Returns:
+ * 0 on success
+ * -EINTR when interrupted by a signal
+ */
+int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
+{
+ return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
+
/**
* rt_mutex_trylock - try to lock a rt_mutex
*
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index b2e553f9255bf..9e396a09fe0fd 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -257,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock)
}
EXPORT_SYMBOL(rt_write_unlock);
-int __sched rt_rwlock_is_contended(rwlock_t *rwlock)
-{
- return rw_base_is_contended(&rwlock->rwbase);
-}
-EXPORT_SYMBOL(rt_rwlock_is_contended);
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
struct lock_class_key *key)
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
index 0e00205cf467a..d1473c624105c 100644
--- a/kernel/locking/ww_rt_mutex.c
+++ b/kernel/locking/ww_rt_mutex.c
@@ -26,7 +26,7 @@ int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
if (__rt_mutex_trylock(&rtm->rtmutex)) {
ww_mutex_set_context_fastpath(lock, ww_ctx);
- mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_);
+ mutex_acquire_nest(&rtm->dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_);
return 1;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index cefd7d82366fb..d509c0694af95 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -178,12 +178,28 @@ static void panic_print_sys_info(void)
void panic(const char *fmt, ...)
{
static char buf[1024];
+ va_list args2;
va_list args;
long i, i_next = 0, len;
int state = 0;
int old_cpu, this_cpu;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
+ console_verbose();
+ pr_emerg("Kernel panic - not syncing:\n");
+ va_start(args2, fmt);
+ va_copy(args, args2);
+ vprintk(fmt, args2);
+ va_end(args2);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+ /*
+ * Avoid nested stack-dumping if a panic occurs during oops processing
+ */
+ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+ dump_stack();
+#endif
+ pr_flush(1000, true);
+
/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
@@ -214,24 +230,13 @@ void panic(const char *fmt, ...)
if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
panic_smp_self_stop();
- console_verbose();
bust_spinlocks(1);
- va_start(args, fmt);
len = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
if (len && buf[len - 1] == '\n')
buf[len - 1] = '\0';
- pr_emerg("Kernel panic - not syncing: %s\n", buf);
-#ifdef CONFIG_DEBUG_BUGVERBOSE
- /*
- * Avoid nested stack-dumping if a panic occurs during oops processing
- */
- if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
- dump_stack();
-#endif
-
/*
* If kgdb is enabled, give it a chance to run before we stop all
* the other CPUs or else we won't be able to debug processes left
@@ -540,9 +545,11 @@ static u64 oops_id;
static int init_oops_id(void)
{
+#ifndef CONFIG_PREEMPT_RT
if (!oops_id)
get_random_bytes(&oops_id, sizeof(oops_id));
else
+#endif
oops_id++;
return 0;
@@ -553,6 +560,7 @@ static void print_oops_end_marker(void)
{
init_oops_id();
pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+ pr_flush(1000, true);
}
/*
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 44169f3081fdc..eaa725ca079cb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -543,14 +543,13 @@ static int __init pm_debug_messages_setup(char *str)
__setup("pm_debug_messages", pm_debug_messages_setup);
/**
- * __pm_pr_dbg - Print a suspend debug message to the kernel log.
- * @defer: Whether or not to use printk_deferred() to print the message.
+ * pm_pr_dbg - Print a suspend debug message to the kernel log.
* @fmt: Message format.
*
* The message will be emitted if enabled through the pm_debug_messages
* sysfs attribute.
*/
-void __pm_pr_dbg(bool defer, const char *fmt, ...)
+void pm_pr_dbg(const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -563,10 +562,7 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- if (defer)
- printk_deferred(KERN_DEBUG "PM: %pV", &vaf);
- else
- printk(KERN_DEBUG "PM: %pV", &vaf);
+ printk(KERN_DEBUG "PM: %pV", &vaf);
va_end(args);
}
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index d118739874c0d..bc6b856a0ff41 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y = printk.o
-obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
obj-$(CONFIG_PRINTK) += printk_ringbuffer.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 9f3ed2fdb7211..de8ab059dd961 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -2,7 +2,6 @@
/*
* internal.h - printk internal definitions
*/
-#include <linux/percpu.h>
#ifdef CONFIG_PRINTK
@@ -12,41 +11,6 @@ enum printk_info_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
-__printf(4, 0)
-int vprintk_store(int facility, int level,
- const struct dev_printk_info *dev_info,
- const char *fmt, va_list args);
-
-__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
-__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
-
-bool printk_percpu_data_ready(void);
-
-#define printk_safe_enter_irqsave(flags) \
- do { \
- local_irq_save(flags); \
- __printk_safe_enter(); \
- } while (0)
-
-#define printk_safe_exit_irqrestore(flags) \
- do { \
- __printk_safe_exit(); \
- local_irq_restore(flags); \
- } while (0)
-
-void defer_console_output(void);
-
u16 printk_parse_prefix(const char *text, int *level,
enum printk_info_flags *flags);
-#else
-
-/*
- * In !PRINTK builds we still export console_sem
- * semaphore and some of console functions (console_unlock()/etc.), so
- * printk-safe must preserve the existing local IRQ guarantees.
- */
-#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
-#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
-
-static inline bool printk_percpu_data_ready(void) { return false; }
#endif /* CONFIG_PRINTK */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 013bfd6dcc34a..0e7a8ac342e39 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -44,6 +44,10 @@
#include <linux/irq_work.h>
#include <linux/ctype.h>
#include <linux/uio.h>
+#include <linux/kdb.h>
+#include <linux/kgdb.h>
+#include <linux/kthread.h>
+#include <linux/clocksource.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@@ -225,19 +229,7 @@ static int nr_ext_console_drivers;
static int __down_trylock_console_sem(unsigned long ip)
{
- int lock_failed;
- unsigned long flags;
-
- /*
- * Here and in __up_console_sem() we need to be in safe mode,
- * because spindump/WARN/etc from under console ->lock will
- * deadlock in printk()->down_trylock_console_sem() otherwise.
- */
- printk_safe_enter_irqsave(flags);
- lock_failed = down_trylock(&console_sem);
- printk_safe_exit_irqrestore(flags);
-
- if (lock_failed)
+ if (down_trylock(&console_sem))
return 1;
mutex_acquire(&console_lock_dep_map, 0, 1, ip);
return 0;
@@ -246,13 +238,9 @@ static int __down_trylock_console_sem(unsigned long ip)
static void __up_console_sem(unsigned long ip)
{
- unsigned long flags;
-
mutex_release(&console_lock_dep_map, ip);
- printk_safe_enter_irqsave(flags);
up(&console_sem);
- printk_safe_exit_irqrestore(flags);
}
#define up_console_sem() __up_console_sem(_RET_IP_)
@@ -266,11 +254,6 @@ static void __up_console_sem(unsigned long ip)
*/
static int console_locked, console_suspended;
-/*
- * If exclusive_console is non-NULL then only this console is to be printed to.
- */
-static struct console *exclusive_console;
-
/*
* Array of consoles built from command line options (console=)
*/
@@ -350,10 +333,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
* non-prinatable characters are escaped in the "\xff" notation.
*/
+#ifdef CONFIG_PRINTK
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);
-#ifdef CONFIG_PRINTK
+/* Set to enable sync mode. Once set, it is never cleared. */
+static bool sync_mode;
+
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -361,17 +347,6 @@ static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
-struct latched_seq {
- seqcount_latch_t latch;
- u64 val[2];
-};
-
/*
* The next printk record to read after the last 'clear' command. There are
* two copies (updated with seqcount_latch) so that reads can locklessly
@@ -389,9 +364,6 @@ static struct latched_seq clear_seq = {
#define PREFIX_MAX 32
#endif
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
-#define CONSOLE_LOG_MAX 1024
-
/* the maximum size allowed to be reserved for a record */
#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX)
@@ -430,12 +402,12 @@ static struct printk_ringbuffer *prb = &printk_rb_static;
*/
static bool __printk_percpu_data_ready __read_mostly;
-bool printk_percpu_data_ready(void)
+static bool printk_percpu_data_ready(void)
{
return __printk_percpu_data_ready;
}
-/* Must be called under syslog_lock. */
+/* Must be called under associated write-protection lock. */
static void latched_seq_write(struct latched_seq *ls, u64 val)
{
raw_write_seqcount_latch(&ls->latch);
@@ -1747,190 +1719,154 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
}
-/*
- * Special console_lock variants that help to reduce the risk of soft-lockups.
- * They allow to pass console_lock to another printk() call using a busy wait.
- */
+int printk_delay_msec __read_mostly;
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_owner_dep_map = {
- .name = "console_owner"
-};
-#endif
-
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
-static struct task_struct *console_owner;
-static bool console_waiter;
-
-/**
- * console_lock_spinning_enable - mark beginning of code where another
- * thread might safely busy wait
- *
- * This basically converts console_lock into a spinlock. This marks
- * the section where the console_lock owner can not sleep, because
- * there may be a waiter spinning (like a spinlock). Also it must be
- * ready to hand over the lock at the end of the section.
- */
-static void console_lock_spinning_enable(void)
+static inline void printk_delay(int level)
{
- raw_spin_lock(&console_owner_lock);
- console_owner = current;
- raw_spin_unlock(&console_owner_lock);
+ boot_delay_msec(level);
- /* The waiter may spin on us after setting console_owner */
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-}
+ if (unlikely(printk_delay_msec)) {
+ int m = printk_delay_msec;
-/**
- * console_lock_spinning_disable_and_check - mark end of code where another
- * thread was able to busy wait and check if there is a waiter
- *
- * This is called at the end of the section where spinning is allowed.
- * It has two functions. First, it is a signal that it is no longer
- * safe to start busy waiting for the lock. Second, it checks if
- * there is a busy waiter and passes the lock rights to her.
- *
- * Important: Callers lose the lock if there was a busy waiter.
- * They must not touch items synchronized by console_lock
- * in this case.
- *
- * Return: 1 if the lock rights were passed, 0 otherwise.
- */
-static int console_lock_spinning_disable_and_check(void)
-{
- int waiter;
-
- raw_spin_lock(&console_owner_lock);
- waiter = READ_ONCE(console_waiter);
- console_owner = NULL;
- raw_spin_unlock(&console_owner_lock);
-
- if (!waiter) {
- spin_release(&console_owner_dep_map, _THIS_IP_);
- return 0;
- }
-
- /* The waiter is now free to continue */
- WRITE_ONCE(console_waiter, false);
-
- spin_release(&console_owner_dep_map, _THIS_IP_);
-
- /*
- * Hand off console_lock to waiter. The waiter will perform
- * the up(). After this, the waiter is the console_lock owner.
- */
- mutex_release(&console_lock_dep_map, _THIS_IP_);
- return 1;
-}
-
-/**
- * console_trylock_spinning - try to get console_lock by busy waiting
- *
- * This allows to busy wait for the console_lock when the current
- * owner is running in specially marked sections. It means that
- * the current owner is running and cannot reschedule until it
- * is ready to lose the lock.
- *
- * Return: 1 if we got the lock, 0 othrewise
- */
-static int console_trylock_spinning(void)
-{
- struct task_struct *owner = NULL;
- bool waiter;
- bool spin = false;
- unsigned long flags;
-
- if (console_trylock())
- return 1;
-
- printk_safe_enter_irqsave(flags);
-
- raw_spin_lock(&console_owner_lock);
- owner = READ_ONCE(console_owner);
- waiter = READ_ONCE(console_waiter);
- if (!waiter && owner && owner != current) {
- WRITE_ONCE(console_waiter, true);
- spin = true;
- }
- raw_spin_unlock(&console_owner_lock);
-
- /*
- * If there is an active printk() writing to the
- * consoles, instead of having it write our data too,
- * see if we can offload that load from the active
- * printer, and do some printing ourselves.
- * Go into a spin only if there isn't already a waiter
- * spinning, and there is an active printer, and
- * that active printer isn't us (recursive printk?).
- */
- if (!spin) {
- printk_safe_exit_irqrestore(flags);
- return 0;
- }
-
- /* We spin waiting for the owner to release us */
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
- /* Owner will clear console_waiter on hand off */
- while (READ_ONCE(console_waiter))
- cpu_relax();
- spin_release(&console_owner_dep_map, _THIS_IP_);
-
- printk_safe_exit_irqrestore(flags);
- /*
- * The owner passed the console lock to us.
- * Since we did not spin on console lock, annotate
- * this as a trylock. Otherwise lockdep will
- * complain.
- */
- mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
-
- return 1;
-}
-
-/*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
- */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len)
-{
- static char dropped_text[64];
- size_t dropped_len = 0;
- struct console *con;
-
- trace_console_rcuidle(text, len);
-
- if (!console_drivers)
- return;
-
- if (console_dropped) {
- dropped_len = snprintf(dropped_text, sizeof(dropped_text),
- "** %lu printk messages dropped **\n",
- console_dropped);
- console_dropped = 0;
- }
-
- for_each_console(con) {
- if (exclusive_console && con != exclusive_console)
- continue;
- if (!(con->flags & CON_ENABLED))
- continue;
- if (!con->write)
- continue;
- if (!cpu_online(smp_processor_id()) &&
- !(con->flags & CON_ANYTIME))
- continue;
- if (con->flags & CON_EXTENDED)
- con->write(con, ext_text, ext_len);
- else {
- if (dropped_len)
- con->write(con, dropped_text, dropped_len);
- con->write(con, text, len);
+ while (m--) {
+ mdelay(1);
+ touch_nmi_watchdog();
}
}
}
+static bool kernel_sync_mode(void)
+{
+ return (oops_in_progress || sync_mode);
+}
+
+static bool console_may_sync(struct console *con)
+{
+ if (!(con->flags & CON_ENABLED))
+ return false;
+ if (con->write_atomic && kernel_sync_mode())
+ return true;
+ if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
+ return true;
+ if (con->write && (con->flags & CON_BOOT) && !con->thread)
+ return true;
+ return false;
+}
+
+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
+{
+ if (!(con->flags & CON_ENABLED))
+ return false;
+
+ if (con->write_atomic && kernel_sync_mode()) {
+ con->write_atomic(con, text, text_len);
+ return true;
+ }
+
+ if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) {
+ if (console_trylock()) {
+ con->write_atomic(con, text, text_len);
+ console_unlock();
+ return true;
+ }
+
+ } else if (con->write && (con->flags & CON_BOOT) && !con->thread) {
+ if (console_trylock()) {
+ con->write(con, text, text_len);
+ console_unlock();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool have_atomic_console(void)
+{
+ struct console *con;
+
+ for_each_console(con) {
+ if (!(con->flags & CON_ENABLED))
+ continue;
+ if (con->write_atomic)
+ return true;
+ }
+ return false;
+}
+
+static bool print_sync(struct console *con, u64 *seq)
+{
+ struct printk_info info;
+ struct printk_record r;
+ size_t text_len;
+
+ prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
+
+ if (!prb_read_valid(prb, *seq, &r))
+ return false;
+
+ text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+
+ if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
+ return false;
+
+ *seq = r.info->seq;
+
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ rcu_cpu_stall_reset();
+ touch_nmi_watchdog();
+
+ if (text_len)
+ printk_delay(r.info->level);
+
+ return true;
+}
+
+static u64 read_console_seq(struct console *con)
+{
+ u64 seq2;
+ u64 seq;
+
+ seq = latched_seq_read_nolock(&con->printk_seq);
+ seq2 = latched_seq_read_nolock(&con->printk_sync_seq);
+ if (seq2 > seq)
+ seq = seq2;
+#ifdef CONFIG_HAVE_NMI
+ seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq);
+ if (seq2 > seq)
+ seq = seq2;
+#endif
+ return seq;
+}
+
+static void print_sync_until(struct console *con, u64 seq, bool is_locked)
+{
+ u64 printk_seq;
+
+ while (!__printk_cpu_trylock())
+ cpu_relax();
+
+ for (;;) {
+ printk_seq = read_console_seq(con);
+ if (printk_seq >= seq)
+ break;
+ if (!print_sync(con, &printk_seq))
+ break;
+
+ if (is_locked)
+ latched_seq_write(&con->printk_seq, printk_seq + 1);
+#ifdef CONFIG_PRINTK_NMI
+ else if (in_nmi())
+ latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1);
+#endif
+ else
+ latched_seq_write(&con->printk_sync_seq, printk_seq + 1);
+ }
+
+ __printk_cpu_unlock();
+}
+
/*
* Recursion is tracked separately on each CPU. If NMIs are supported, an
* additional NMI context per CPU is also separately tracked. Until per-CPU
@@ -2001,20 +1937,6 @@ static u8 *__printk_recursion_counter(void)
local_irq_restore(flags); \
} while (0)
-int printk_delay_msec __read_mostly;
-
-static inline void printk_delay(void)
-{
- if (unlikely(printk_delay_msec)) {
- int m = printk_delay_msec;
-
- while (m--) {
- mdelay(1);
- touch_nmi_watchdog();
- }
- }
-}
-
static inline u32 printk_caller_id(void)
{
return in_task() ? task_pid_nr(current) :
@@ -2096,13 +2018,14 @@ static u16 printk_sprint(char *text, u16 size, int facility,
}
__printf(4, 0)
-int vprintk_store(int facility, int level,
- const struct dev_printk_info *dev_info,
- const char *fmt, va_list args)
+static int vprintk_store(int facility, int level,
+ const struct dev_printk_info *dev_info,
+ const char *fmt, va_list args)
{
const u32 caller_id = printk_caller_id();
struct prb_reserved_entry e;
enum printk_info_flags flags = 0;
+ bool final_commit = false;
struct printk_record r;
unsigned long irqflags;
u16 trunc_msg_len = 0;
@@ -2113,6 +2036,7 @@ int vprintk_store(int facility, int level,
u16 text_len;
int ret = 0;
u64 ts_nsec;
+ u64 seq;
/*
* Since the duration of printk() can vary depending on the message
@@ -2151,6 +2075,7 @@ int vprintk_store(int facility, int level,
if (flags & LOG_CONT) {
prb_rec_init_wr(&r, reserve_size);
if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+ seq = r.info->seq;
text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
facility, &flags, fmt, args);
r.info->text_len += text_len;
@@ -2158,6 +2083,7 @@ int vprintk_store(int facility, int level,
if (flags & LOG_NEWLINE) {
r.info->flags |= LOG_NEWLINE;
prb_final_commit(&e);
+ final_commit = true;
} else {
prb_commit(&e);
}
@@ -2181,6 +2107,7 @@ int vprintk_store(int facility, int level,
if (!prb_reserve(&e, prb, &r))
goto out;
}
+ seq = r.info->seq;
/* fill message */
text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
@@ -2196,13 +2123,25 @@ int vprintk_store(int facility, int level,
memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
/* A message without a trailing newline can be continued. */
- if (!(flags & LOG_NEWLINE))
+ if (!(flags & LOG_NEWLINE)) {
prb_commit(&e);
- else
+ } else {
prb_final_commit(&e);
+ final_commit = true;
+ }
ret = text_len + trunc_msg_len;
out:
+ /* only the kernel may perform synchronous printing */
+ if (facility == 0 && final_commit) {
+ struct console *con;
+
+ for_each_console(con) {
+ if (console_may_sync(con))
+ print_sync_until(con, seq + 1, false);
+ }
+ }
+
printk_exit_irqrestore(recursion_ptr, irqflags);
return ret;
}
@@ -2212,50 +2151,43 @@ asmlinkage int vprintk_emit(int facility, int level,
const char *fmt, va_list args)
{
int printed_len;
- bool in_sched = false;
/* Suppress unimportant messages after panic happens */
if (unlikely(suppress_printk))
return 0;
- if (level == LOGLEVEL_SCHED) {
+ if (level == LOGLEVEL_SCHED)
level = LOGLEVEL_DEFAULT;
- in_sched = true;
- }
-
- boot_delay_msec(level);
- printk_delay();
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
- /* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
- /*
- * Disable preemption to avoid being preempted while holding
- * console_sem which would prevent anyone from printing to
- * console
- */
- preempt_disable();
- /*
- * Try to acquire and then immediately release the console
- * semaphore. The release will print out buffers and wake up
- * /dev/kmsg and syslog() users.
- */
- if (console_trylock_spinning())
- console_unlock();
- preempt_enable();
- }
-
wake_up_klogd();
return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);
-int vprintk_default(const char *fmt, va_list args)
+__printf(1, 0)
+static int vprintk_default(const char *fmt, va_list args)
{
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
}
-EXPORT_SYMBOL_GPL(vprintk_default);
+
+__printf(1, 0)
+static int vprintk_func(const char *fmt, va_list args)
+{
+#ifdef CONFIG_KGDB_KDB
+ /* Allow to pass printk() to kdb but avoid a recursion. */
+ if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
+ return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
+#endif
+ return vprintk_default(fmt, args);
+}
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+ return vprintk_func(fmt, args);
+}
+EXPORT_SYMBOL(vprintk);
asmlinkage __visible int _printk(const char *fmt, ...)
{
@@ -2270,37 +2202,162 @@ asmlinkage __visible int _printk(const char *fmt, ...)
}
EXPORT_SYMBOL(_printk);
-#else /* CONFIG_PRINTK */
-
-#define CONSOLE_LOG_MAX 0
-#define printk_time false
-
-#define prb_read_valid(rb, seq, r) false
-#define prb_first_valid_seq(rb) 0
-
-static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
-static size_t record_print_text(const struct printk_record *r,
- bool syslog, bool time)
+static int printk_kthread_func(void *data)
{
- return 0;
+ struct console *con = data;
+ unsigned long dropped = 0;
+ char *dropped_text = NULL;
+ struct printk_info info;
+ struct printk_record r;
+ char *ext_text = NULL;
+ size_t dropped_len;
+ int ret = -ENOMEM;
+ char *text = NULL;
+ char *write_text;
+ size_t len;
+ int error;
+ u64 seq;
+
+ if (con->flags & CON_EXTENDED) {
+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+ if (!ext_text)
+ goto out;
+ }
+ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ dropped_text = kmalloc(64, GFP_KERNEL);
+ if (!text || !dropped_text)
+ goto out;
+ if (con->flags & CON_EXTENDED)
+ write_text = ext_text;
+ else
+ write_text = text;
+
+ seq = read_console_seq(con);
+
+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+
+ for (;;) {
+ error = wait_event_interruptible(log_wait,
+ prb_read_valid(prb, seq, &r) || kthread_should_stop());
+
+ if (kthread_should_stop())
+ break;
+
+ if (error)
+ continue;
+
+ if (seq != r.info->seq) {
+ dropped += r.info->seq - seq;
+ seq = r.info->seq;
+ }
+
+ seq++;
+
+ if (!(con->flags & CON_ENABLED))
+ continue;
+
+ if (suppress_message_printing(r.info->level))
+ continue;
+
+ if (con->flags & CON_EXTENDED) {
+ len = info_print_ext_header(ext_text,
+ CONSOLE_EXT_LOG_MAX,
+ r.info);
+ len += msg_print_ext_body(ext_text + len,
+ CONSOLE_EXT_LOG_MAX - len,
+ &r.text_buf[0], r.info->text_len,
+ &r.info->dev_info);
+ } else {
+ len = record_print_text(&r,
+ console_msg_format & MSG_FORMAT_SYSLOG,
+ printk_time);
+ }
+
+ console_lock();
+
+ /*
+ * Even though the printk kthread is always preemptible, it is
+ * still not allowed to call cond_resched() from within
+ * console drivers. The task may become non-preemptible in the
+ * console driver call chain. For example, vt_console_print()
+ * takes a spinlock and then can call into fbcon_redraw(),
+ * which can conditionally invoke cond_resched().
+ */
+ console_may_schedule = 0;
+
+ if (kernel_sync_mode() && con->write_atomic) {
+ console_unlock();
+ break;
+ }
+
+ if (!(con->flags & CON_EXTENDED) && dropped) {
+ dropped_len = snprintf(dropped_text, 64,
+ "** %lu printk messages dropped **\n",
+ dropped);
+ dropped = 0;
+
+ con->write(con, dropped_text, dropped_len);
+ printk_delay(r.info->level);
+ }
+
+ con->write(con, write_text, len);
+ if (len)
+ printk_delay(r.info->level);
+
+ latched_seq_write(&con->printk_seq, seq);
+
+ console_unlock();
+ }
+ ret = 0;
+out:
+ kfree(dropped_text);
+ kfree(text);
+ kfree(ext_text);
+ pr_info("%sconsole [%s%d]: printing thread stopped\n",
+ (con->flags & CON_BOOT) ? "boot" : "",
+ con->name, con->index);
+ return ret;
}
-static ssize_t info_print_ext_header(char *buf, size_t size,
- struct printk_info *info)
+
+/* Must be called within console_lock(). */
+static void start_printk_kthread(struct console *con)
{
- return 0;
+ con->thread = kthread_run(printk_kthread_func, con,
+ "pr/%s%d", con->name, con->index);
+ if (IS_ERR(con->thread)) {
+ pr_err("%sconsole [%s%d]: unable to start printing thread\n",
+ (con->flags & CON_BOOT) ? "boot" : "",
+ con->name, con->index);
+ return;
+ }
+ pr_info("%sconsole [%s%d]: printing thread started\n",
+ (con->flags & CON_BOOT) ? "boot" : "",
+ con->name, con->index);
+}
+
+/* protected by console_lock */
+static bool kthreads_started;
+
+/* Must be called within console_lock(). */
+static void console_try_thread(struct console *con)
+{
+ if (kthreads_started) {
+ start_printk_kthread(con);
+ return;
+ }
+
+ /*
+ * The printing threads have not been started yet. If this console
+ * can print synchronously, print all unprinted messages.
+ */
+ if (console_may_sync(con)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ print_sync_until(con, prb_next_seq(prb), true);
+ local_irq_restore(flags);
+ }
}
-static ssize_t msg_print_ext_body(char *buf, size_t size,
- char *text, size_t text_len,
- struct dev_printk_info *dev_info) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len) {}
-static bool suppress_message_printing(int level) { return false; }
#endif /* CONFIG_PRINTK */
@@ -2557,34 +2614,6 @@ int is_console_locked(void)
}
EXPORT_SYMBOL(is_console_locked);
-/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
- struct console *con;
-
- for_each_console(con)
- if ((con->flags & CON_ENABLED) &&
- (con->flags & CON_ANYTIME))
- return 1;
-
- return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(void)
-{
- return cpu_online(raw_smp_processor_id()) || have_callable_console();
-}
-
/**
* console_unlock - unlock the console system
*
@@ -2601,140 +2630,13 @@ static inline int can_use_console(void)
*/
void console_unlock(void)
{
- static char ext_text[CONSOLE_EXT_LOG_MAX];
- static char text[CONSOLE_LOG_MAX];
- unsigned long flags;
- bool do_cond_resched, retry;
- struct printk_info info;
- struct printk_record r;
- u64 __maybe_unused next_seq;
-
if (console_suspended) {
up_console_sem();
return;
}
- prb_rec_init_rd(&r, &info, text, sizeof(text));
-
- /*
- * Console drivers are called with interrupts disabled, so
- * @console_may_schedule should be cleared before; however, we may
- * end up dumping a lot of lines, for example, if called from
- * console registration path, and should invoke cond_resched()
- * between lines if allowable. Not doing so can cause a very long
- * scheduling stall on a slow console leading to RCU stall and
- * softlockup warnings which exacerbate the issue with more
- * messages practically incapacitating the system.
- *
- * console_trylock() is not able to detect the preemptive
- * context reliably. Therefore the value must be stored before
- * and cleared after the "again" goto label.
- */
- do_cond_resched = console_may_schedule;
-again:
- console_may_schedule = 0;
-
- /*
- * We released the console_sem lock, so we need to recheck if
- * cpu is online and (if not) is there at least one CON_ANYTIME
- * console.
- */
- if (!can_use_console()) {
- console_locked = 0;
- up_console_sem();
- return;
- }
-
- for (;;) {
- size_t ext_len = 0;
- int handover;
- size_t len;
-
-skip:
- if (!prb_read_valid(prb, console_seq, &r))
- break;
-
- if (console_seq != r.info->seq) {
- console_dropped += r.info->seq - console_seq;
- console_seq = r.info->seq;
- }
-
- if (suppress_message_printing(r.info->level)) {
- /*
- * Skip record we have buffered and already printed
- * directly to the console when we received it, and
- * record that has level above the console loglevel.
- */
- console_seq++;
- goto skip;
- }
-
- /* Output to all consoles once old messages replayed. */
- if (unlikely(exclusive_console &&
- console_seq >= exclusive_console_stop_seq)) {
- exclusive_console = NULL;
- }
-
- /*
- * Handle extended console text first because later
- * record_print_text() will modify the record buffer in-place.
- */
- if (nr_ext_console_drivers) {
- ext_len = info_print_ext_header(ext_text,
- sizeof(ext_text),
- r.info);
- ext_len += msg_print_ext_body(ext_text + ext_len,
- sizeof(ext_text) - ext_len,
- &r.text_buf[0],
- r.info->text_len,
- &r.info->dev_info);
- }
- len = record_print_text(&r,
- console_msg_format & MSG_FORMAT_SYSLOG,
- printk_time);
- console_seq++;
-
- /*
- * While actively printing out messages, if another printk()
- * were to occur on another CPU, it may wait for this one to
- * finish. This task can not be preempted if there is a
- * waiter waiting to take over.
- *
- * Interrupts are disabled because the hand over to a waiter
- * must not be interrupted until the hand over is completed
- * (@console_waiter is cleared).
- */
- printk_safe_enter_irqsave(flags);
- console_lock_spinning_enable();
-
- stop_critical_timings(); /* don't trace print latency */
- call_console_drivers(ext_text, ext_len, text, len);
- start_critical_timings();
-
- handover = console_lock_spinning_disable_and_check();
- printk_safe_exit_irqrestore(flags);
- if (handover)
- return;
-
- if (do_cond_resched)
- cond_resched();
- }
-
- /* Get consistent value of the next-to-be-used sequence number. */
- next_seq = console_seq;
-
console_locked = 0;
up_console_sem();
-
- /*
- * Someone could have filled up the buffer again, so re-check if there's
- * something to flush. In case we cannot trylock the console_sem again,
- * there's a new owner and the console_unlock() from them will do the
- * flush, no worries.
- */
- retry = prb_read_valid(prb, next_seq, NULL);
- if (retry && console_trylock())
- goto again;
}
EXPORT_SYMBOL(console_unlock);
@@ -2784,18 +2686,20 @@ void console_unblank(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
- /*
- * If someone else is holding the console lock, trylock will fail
- * and may_schedule may be set. Ignore and proceed to unlock so
- * that messages are flushed out. As this can be called from any
- * context and we don't want to get preempted while flushing,
- * ensure may_schedule is cleared.
- */
- console_trylock();
- console_may_schedule = 0;
+ if (!console_trylock())
+ return;
+
+#ifdef CONFIG_PRINTK
+ if (mode == CONSOLE_REPLAY_ALL) {
+ struct console *c;
+ u64 seq;
+
+ seq = prb_first_valid_seq(prb);
+ for_each_console(c)
+ latched_seq_write(&c->printk_seq, seq);
+ }
+#endif
- if (mode == CONSOLE_REPLAY_ALL)
- console_seq = prb_first_valid_seq(prb);
console_unlock();
}
@@ -2931,6 +2835,7 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
void register_console(struct console *newcon)
{
struct console *bcon = NULL;
+ u64 __maybe_unused seq = 0;
int err;
for_each_console(bcon) {
@@ -2953,6 +2858,8 @@ void register_console(struct console *newcon)
}
}
+ newcon->thread = NULL;
+
if (console_drivers && console_drivers->flags & CON_BOOT)
bcon = console_drivers;
@@ -2994,8 +2901,10 @@ void register_console(struct console *newcon)
* the real console are the same physical device, it's annoying to
* see the beginning boot messages twice
*/
- if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
+ if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
newcon->flags &= ~CON_PRINTBUFFER;
+ newcon->flags |= CON_HANDOVER;
+ }
/*
* Put this console in the list - keep the
@@ -3017,27 +2926,21 @@ void register_console(struct console *newcon)
if (newcon->flags & CON_EXTENDED)
nr_ext_console_drivers++;
- if (newcon->flags & CON_PRINTBUFFER) {
- /*
- * console_unlock(); will print out the buffered messages
- * for us.
- *
- * We're about to replay the log buffer. Only do this to the
- * just-registered console to avoid excessive message spam to
- * the already-registered consoles.
- *
- * Set exclusive_console with disabled interrupts to reduce
- * race window with eventual console_flush_on_panic() that
- * ignores console_lock.
- */
- exclusive_console = newcon;
- exclusive_console_stop_seq = console_seq;
+#ifdef CONFIG_PRINTK
+ if (!(newcon->flags & CON_PRINTBUFFER))
+ seq = prb_next_seq(prb);
- /* Get a consistent copy of @syslog_seq. */
- mutex_lock(&syslog_lock);
- console_seq = syslog_seq;
- mutex_unlock(&syslog_lock);
- }
+ seqcount_latch_init(&newcon->printk_seq.latch);
+ latched_seq_write(&newcon->printk_seq, seq);
+ seqcount_latch_init(&newcon->printk_sync_seq.latch);
+ latched_seq_write(&newcon->printk_sync_seq, seq);
+#ifdef CONFIG_HAVE_NMI
+ seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch);
+ latched_seq_write(&newcon->printk_sync_nmi_seq, seq);
+#endif
+
+ console_try_thread(newcon);
+#endif /* CONFIG_PRINTK */
console_unlock();
console_sysfs_notify();
@@ -3111,6 +3014,9 @@ int unregister_console(struct console *console)
console_unlock();
console_sysfs_notify();
+ if (console->thread && !IS_ERR(console->thread))
+ kthread_stop(console->thread);
+
if (console->exit)
res = console->exit(console);
@@ -3193,6 +3099,15 @@ static int __init printk_late_init(void)
unregister_console(con);
}
}
+
+#ifdef CONFIG_PRINTK
+ console_lock();
+ for_each_console(con)
+ start_printk_kthread(con);
+ kthreads_started = true;
+ console_unlock();
+#endif
+
ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
console_cpu_notify);
WARN_ON(ret < 0);
@@ -3208,7 +3123,6 @@ late_initcall(printk_late_init);
* Delayed printk version, for scheduler-internal messages:
*/
#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_OUTPUT 0x02
static DEFINE_PER_CPU(int, printk_pending);
@@ -3216,14 +3130,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
int pending = __this_cpu_xchg(printk_pending, 0);
- if (pending & PRINTK_PENDING_OUTPUT) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
- }
-
if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible(&log_wait);
+ wake_up_interruptible_all(&log_wait);
}
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@@ -3242,39 +3150,6 @@ void wake_up_klogd(void)
preempt_enable();
}
-void defer_console_output(void)
-{
- if (!printk_percpu_data_ready())
- return;
-
- preempt_disable();
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
- preempt_enable();
-}
-
-int vprintk_deferred(const char *fmt, va_list args)
-{
- int r;
-
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
- defer_console_output();
-
- return r;
-}
-
-int _printk_deferred(const char *fmt, ...)
-{
- va_list args;
- int r;
-
- va_start(args, fmt);
- r = vprintk_deferred(fmt, args);
- va_end(args);
-
- return r;
-}
-
/*
* printk rate limiting, lifted from the networking subsystem.
*
@@ -3401,6 +3276,24 @@ void kmsg_dump(enum kmsg_dump_reason reason)
{
struct kmsg_dumper *dumper;
+ if (!oops_in_progress) {
+ /*
+ * If atomic consoles are available, activate kernel sync mode
+ * to make sure any final messages are visible. The trailing
+ * printk message is important to flush any pending messages.
+ */
+ if (have_atomic_console()) {
+ sync_mode = true;
+ pr_info("enabled sync mode\n");
+ }
+
+ /*
+ * Give the printing threads time to flush, allowing up to
+ * 1s of no printing forward progress before giving up.
+ */
+ pr_flush(1000, true);
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(dumper, &dump_list, list) {
enum kmsg_dump_reason max_reason = dumper->max_reason;
@@ -3583,6 +3476,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#ifdef CONFIG_SMP
static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+static unsigned int kgdb_cpu = -1;
/**
* __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
@@ -3662,6 +3556,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock);
*/
void __printk_cpu_unlock(void)
{
+ bool trigger_kgdb = false;
+ unsigned int cpu;
+
if (atomic_read(&printk_cpulock_nested)) {
atomic_dec(&printk_cpulock_nested);
return;
@@ -3672,6 +3569,12 @@ void __printk_cpu_unlock(void)
* LMM(__printk_cpu_unlock:A)
*/
+ cpu = smp_processor_id();
+ if (kgdb_cpu == cpu) {
+ trigger_kgdb = true;
+ kgdb_cpu = -1;
+ }
+
/*
* Guarantee loads and stores from this CPU when it was the
* lock owner are visible to the next lock owner. This pairs
@@ -3692,6 +3595,98 @@ void __printk_cpu_unlock(void)
*/
atomic_set_release(&printk_cpulock_owner,
-1); /* LMM(__printk_cpu_unlock:B) */
+
+ if (trigger_kgdb) {
+ pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu);
+ kgdb_roundup_cpu(cpu);
+ }
}
EXPORT_SYMBOL(__printk_cpu_unlock);
+
+bool kgdb_roundup_delay(unsigned int cpu)
+{
+ if (cpu != atomic_read(&printk_cpulock_owner))
+ return false;
+
+ kgdb_cpu = cpu;
+ return true;
+}
+EXPORT_SYMBOL(kgdb_roundup_delay);
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PRINTK
+static void pr_msleep(bool may_sleep, int ms)
+{
+ if (may_sleep) {
+ msleep(ms);
+ } else {
+ while (ms--)
+ udelay(1000);
+ }
+}
+
+/**
+ * pr_flush() - Wait for printing threads to catch up.
+ *
+ * @timeout_ms: The maximum time (in ms) to wait.
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
+ *
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
+ * represents infinite waiting.
+ *
+ * If @reset_on_progress is true, the timeout will be reset whenever any
+ * printer has been seen to make some forward progress.
+ *
+ * Context: Any context.
+ * Return: true if all enabled printers are caught up.
+ */
+bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+ int remaining = timeout_ms;
+ struct console *con;
+ u64 last_diff = 0;
+ bool may_sleep;
+ u64 printk_seq;
+ u64 diff;
+ u64 seq;
+
+ may_sleep = (preemptible() &&
+ !in_softirq() &&
+ system_state >= SYSTEM_RUNNING);
+
+ seq = prb_next_seq(prb);
+
+ for (;;) {
+ diff = 0;
+
+ for_each_console(con) {
+ if (!(con->flags & CON_ENABLED))
+ continue;
+ printk_seq = read_console_seq(con);
+ if (printk_seq < seq)
+ diff += seq - printk_seq;
+ }
+
+ if (diff != last_diff && reset_on_progress)
+ remaining = timeout_ms;
+
+ if (diff == 0 || remaining == 0)
+ break;
+
+ if (remaining < 0) {
+ pr_msleep(may_sleep, 100);
+ } else if (remaining < 100) {
+ pr_msleep(may_sleep, remaining);
+ remaining = 0;
+ } else {
+ pr_msleep(may_sleep, 100);
+ remaining -= 100;
+ }
+
+ last_diff = diff;
+ }
+
+ return (diff == 0);
+}
+EXPORT_SYMBOL(pr_flush);
+#endif /* CONFIG_PRINTK */
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
deleted file mode 100644
index ef0f9a2044da1..0000000000000
--- a/kernel/printk/printk_safe.c
+++ /dev/null
@@ -1,52 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * printk_safe.c - Safe printk for printk-deadlock-prone contexts
- */
-
-#include <linux/preempt.h>
-#include <linux/kdb.h>
-#include <linux/smp.h>
-#include <linux/cpumask.h>
-#include <linux/printk.h>
-#include <linux/kprobes.h>
-
-#include "internal.h"
-
-static DEFINE_PER_CPU(int, printk_context);
-
-/* Can be preempted by NMI. */
-void __printk_safe_enter(void)
-{
- this_cpu_inc(printk_context);
-}
-
-/* Can be preempted by NMI. */
-void __printk_safe_exit(void)
-{
- this_cpu_dec(printk_context);
-}
-
-asmlinkage int vprintk(const char *fmt, va_list args)
-{
-#ifdef CONFIG_KGDB_KDB
- /* Allow to pass printk() to kdb but avoid a recursion. */
- if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
- return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
-#endif
-
- /*
- * Use the main logbuf even in NMI. But avoid calling console
- * drivers that might have their own locks.
- */
- if (this_cpu_read(printk_context) || in_nmi()) {
- int len;
-
- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
- defer_console_output();
- return len;
- }
-
- /* No obstacles. */
- return vprintk_default(fmt, args);
-}
-EXPORT_SYMBOL(vprintk);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f8589bf8d7dce..df08e8e64a83f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
spin_lock_irq(&task->sighand->siglock);
if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
!__fatal_signal_pending(task)) {
+#ifdef CONFIG_PREEMPT_RT
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+ if (READ_ONCE(task->__state) & __TASK_TRACED)
+ WRITE_ONCE(task->__state, __TASK_TRACED);
+ else
+ task->saved_state = __TASK_TRACED;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+#else
WRITE_ONCE(task->__state, __TASK_TRACED);
+#endif
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)
static void ptrace_unfreeze_traced(struct task_struct *task)
{
- if (READ_ONCE(task->__state) != __TASK_TRACED)
+ unsigned long flags;
+ bool frozen = true;
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
+ READ_ONCE(task->__state) != __TASK_TRACED)
return;
WARN_ON(!task->ptrace || task->parent != current);
@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
* Recheck state under the lock to close this race.
*/
spin_lock_irq(&task->sighand->siglock);
- if (READ_ONCE(task->__state) == __TASK_TRACED) {
- if (__fatal_signal_pending(task))
- wake_up_state(task, __TASK_TRACED);
- else
- WRITE_ONCE(task->__state, TASK_TRACED);
- }
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+ if (READ_ONCE(task->__state) == __TASK_TRACED)
+ WRITE_ONCE(task->__state, TASK_TRACED);
+
+#ifdef CONFIG_PREEMPT_RT
+ else if (task->saved_state == __TASK_TRACED)
+ task->saved_state = TASK_TRACED;
+#endif
+ else
+ frozen = false;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+ if (frozen && __fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+
spin_unlock_irq(&task->sighand->siglock);
}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 7da3c81c3f59c..7f9d3df358543 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1345,7 +1345,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
rttd->notrun = true;
}
-static void rcu_tasks_initiate_self_tests(void)
+void rcu_tasks_initiate_self_tests(void)
{
pr_info("Running RCU-tasks wait API self tests\n");
#ifdef CONFIG_TASKS_RCU
@@ -1382,9 +1382,7 @@ static int rcu_tasks_verify_self_tests(void)
return ret;
}
late_initcall(rcu_tasks_verify_self_tests);
-#else /* #ifdef CONFIG_PROVE_RCU */
-static void rcu_tasks_initiate_self_tests(void) { }
-#endif /* #else #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_PROVE_RCU */
void __init rcu_init_tasks_generic(void)
{
@@ -1399,9 +1397,6 @@ void __init rcu_init_tasks_generic(void)
#ifdef CONFIG_TASKS_TRACE_RCU
rcu_spawn_tasks_trace_kthread();
#endif
-
- // Run the self-tests.
- rcu_tasks_initiate_self_tests();
}
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ef8d36f580fc3..44fb12fc7b82a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2276,13 +2276,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
- bool needwake = false;
- const bool offloaded = rcu_rdp_is_offloaded(rdp);
+ bool offloaded, needwake = false;
struct rcu_node *rnp;
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ offloaded = rcu_rdp_is_offloaded(rdp);
if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
rdp->gpwrap) {
@@ -2444,7 +2444,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
int div;
bool __maybe_unused empty;
unsigned long flags;
- const bool offloaded = rcu_rdp_is_offloaded(rdp);
+ bool offloaded;
struct rcu_head *rhp;
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
long bl, count = 0;
@@ -2470,6 +2470,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
rcu_nocb_lock(rdp);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
pending = rcu_segcblist_n_cbs(&rdp->cblist);
+ offloaded = rcu_rdp_is_offloaded(rdp);
div = READ_ONCE(rcu_divisor);
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
bl = max(rdp->blimit, pending >> div);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3c9b0fda64ac0..4753532ff8fa4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -986,6 +986,46 @@ void resched_curr(struct rq *rq)
trace_sched_wake_idle_without_ipi(cpu);
}
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+ return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ int cpu;
+
+ if (!sched_feat(PREEMPT_LAZY)) {
+ resched_curr(rq);
+ return;
+ }
+
+ if (test_tsk_need_resched(curr))
+ return;
+
+ if (test_tsk_need_resched_lazy(curr))
+ return;
+
+ set_tsk_need_resched_lazy(curr);
+
+ cpu = cpu_of(rq);
+ if (cpu == smp_processor_id())
+ return;
+
+ /* NEED_RESCHED_LAZY must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(curr))
+ smp_send_reschedule(cpu);
+}
+#endif
+
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -2160,6 +2200,7 @@ void migrate_disable(void)
preempt_disable();
this_rq()->nr_pinned++;
p->migration_disabled = 1;
+ preempt_lazy_disable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_disable);
@@ -2171,6 +2212,8 @@ void migrate_enable(void)
if (p->migration_disabled > 1) {
p->migration_disabled--;
return;
+ } else if (WARN_ON_ONCE(p->migration_disabled == 0)) {
+ return;
}
/*
@@ -2188,6 +2231,7 @@ void migrate_enable(void)
barrier();
p->migration_disabled = 0;
this_rq()->nr_pinned--;
+ preempt_lazy_enable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_enable);
@@ -2967,9 +3011,8 @@ void force_compatible_cpus_allowed_ptr(struct task_struct *p)
out_set_mask:
if (printk_ratelimit()) {
- printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
- task_pid_nr(p), p->comm,
- cpumask_pr_args(override_mask));
+ printk("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
+ task_pid_nr(p), p->comm, cpumask_pr_args(override_mask));
}
WARN_ON(set_cpus_allowed_ptr(p, override_mask));
@@ -3225,7 +3268,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
+ if (match_state && !task_match_state_lock(p, match_state))
return 0;
cpu_relax();
}
@@ -3240,7 +3283,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
running = task_running(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
- if (!match_state || READ_ONCE(p->__state) == match_state)
+ if (!match_state || task_match_state_or_saved(p, match_state))
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &rf);
@@ -3399,8 +3442,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
* leave kernel.
*/
if (p->mm && printk_ratelimit()) {
- printk_deferred("process %d (%s) no longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, cpu);
+ printk("process %d (%s) no longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
}
}
@@ -4418,6 +4461,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(p)->preempt_lazy_count = 0;
+#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -4880,8 +4926,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
+ /*
+ * Cache only the VMAP stack. The final deallocation is in
+ * delayed_put_task_struct.
+ */
+ put_task_stack_sched(prev);
put_task_struct_rcu_user(prev);
}
@@ -6216,6 +6265,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
+ clear_tsk_need_resched_lazy(prev);
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
rq->last_seen_need_resched_ns = 0;
@@ -6427,6 +6477,30 @@ static void __sched notrace preempt_schedule_common(void)
} while (need_resched());
}
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ return 1;
+ if (current_thread_info()->preempt_lazy_count)
+ return 0;
+ return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+ return 1;
+}
+
+#endif
+
#ifdef CONFIG_PREEMPTION
/*
* This is the entry point to schedule() from in-kernel preemption
@@ -6440,7 +6514,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
*/
if (likely(!preemptible()))
return;
-
+ if (!preemptible_lazy())
+ return;
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
@@ -6473,6 +6548,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
if (likely(!preemptible()))
return;
+ if (!preemptible_lazy())
+ return;
+
do {
/*
* Because the function tracer can trace preempt_count_sub()
@@ -8656,7 +8734,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
/*
* The idle tasks have their own, simple scheduling class:
*/
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d2c072b0ef01f..65d4cc198e44c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -800,7 +800,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
* entity.
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
- printk_deferred_once("sched: DL replenish lagged too much\n");
+ printk_once("sched: DL replenish lagged too much\n");
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e476f6d94351..22a0d3a8c7600 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4393,7 +4393,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
@@ -4417,7 +4417,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (delta > ideal_runtime)
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static void
@@ -4563,7 +4563,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule.
*/
if (queued) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
return;
}
/*
@@ -4712,7 +4712,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static __always_inline
@@ -5475,7 +5475,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
if (delta < 0) {
if (task_current(rq, p))
- resched_curr(rq);
+ resched_curr_lazy(rq);
return;
}
hrtick_start(rq, delta);
@@ -7172,7 +7172,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
preempt:
- resched_curr(rq);
+ resched_curr_lazy(rq);
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
@@ -11207,7 +11207,7 @@ static void task_fork_fair(struct task_struct *p)
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
se->vruntime -= cfs_rq->min_vruntime;
@@ -11234,7 +11234,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (task_current(rq, p)) {
if (p->prio > oldprio)
- resched_curr(rq);
+ resched_curr_lazy(rq);
} else
check_preempt_curr(rq, p, 0);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1cf435bbcd9ca..d5cee51819bf7 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
#ifdef CONFIG_PREEMPT_RT
SCHED_FEAT(TTWU_QUEUE, false)
+# ifdef CONFIG_PREEMPT_LAZY
+SCHED_FEAT(PREEMPT_LAZY, true)
+# endif
#else
/*
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 1652f2bb54b79..ab54b2012469f 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -710,10 +710,10 @@ static void psi_group_change(struct psi_group *group, int cpu,
if (groupc->tasks[t]) {
groupc->tasks[t]--;
} else if (!psi_bug) {
- printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
- cpu, t, groupc->tasks[0],
- groupc->tasks[1], groupc->tasks[2],
- groupc->tasks[3], clear, set);
+ pr_err("psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
+ cpu, t, groupc->tasks[0],
+ groupc->tasks[1], groupc->tasks[2],
+ groupc->tasks[3], clear, set);
psi_bug = 1;
}
}
@@ -779,9 +779,9 @@ static void psi_flags_change(struct task_struct *task, int clear, int set)
if (((task->psi_flags & set) ||
(task->psi_flags & clear) != clear) &&
!psi_bug) {
- printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
- task->pid, task->comm, task_cpu(task),
- task->psi_flags, clear, set);
+ pr_err("psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
+ task->pid, task->comm, task_cpu(task),
+ task->psi_flags, clear, set);
psi_bug = 1;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b48baaba2fc2e..7a5948eeb57b9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -977,7 +977,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
*/
if (likely(rt_b->rt_runtime)) {
rt_rq->rt_throttled = 1;
- printk_deferred_once("sched: RT throttling activated\n");
+ printk_once("sched: RT throttling activated\n");
} else {
/*
* In case we did anyway, make it go away,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0e66749486e75..2a8f54801263b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2300,6 +2300,15 @@ extern void reweight_task(struct task_struct *p, int prio);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
+#ifdef CONFIG_PREEMPT_LAZY
+extern void resched_curr_lazy(struct rq *rq);
+#else
+static inline void resched_curr_lazy(struct rq *rq)
+{
+ resched_curr(rq);
+}
+#endif
+
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index cfb0893a83d45..429182f6e7b0f 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -62,7 +62,7 @@ check_schedstat_required(void)
trace_sched_stat_iowait_enabled() ||
trace_sched_stat_blocked_enabled() ||
trace_sched_stat_runtime_enabled())
- printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
+ printk_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
}
#else /* !CONFIG_SCHEDSTATS: */
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index e1c655f928c74..f230b1ac7f910 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q)
struct swait_queue *curr;
LIST_HEAD(tmp);
+ WARN_ON(irqs_disabled());
raw_spin_lock_irq(&q->lock);
list_splice_init(&q->task_list, &tmp);
while (!list_empty(&tmp)) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 7c4b7ae714d47..b0ee27674008c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1317,6 +1317,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
struct k_sigaction *action;
int sig = info->si_signo;
+ /*
+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
+ * since it can not enable preemption, and the signal code's spin_locks
+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
+ * send the signal on exit of the trap.
+ */
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (in_atomic()) {
+ struct task_struct *t = current;
+
+ if (WARN_ON_ONCE(t->forced_info.si_signo))
+ return 0;
+
+ if (is_si_special(info)) {
+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
+ t->forced_info.si_signo = info->si_signo;
+ t->forced_info.si_errno = 0;
+ t->forced_info.si_code = SI_KERNEL;
+ t->forced_info.si_pid = 0;
+ t->forced_info.si_uid = 0;
+ } else {
+ t->forced_info = *info;
+ }
+
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ return 0;
+ }
+#endif
spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1];
ignored = action->sa.sa_handler == SIG_IGN;
@@ -2249,16 +2277,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
if (gstop_done && ptrace_reparented(current))
do_notify_parent_cldstop(current, false, why);
- /*
- * Don't want to allow preemption here, because
- * sys_ptrace() needs this task to be inactive.
- *
- * XXX: implement read_unlock_no_resched().
- */
- preempt_disable();
read_unlock(&tasklist_lock);
cgroup_enter_frozen();
- preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
} else {
diff --git a/kernel/smp.c b/kernel/smp.c
index 01a7c1706a58b..250311c2009fe 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(void)
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
smp_processor_id(), CFD_SEQ_IDLE);
+
local_irq_save(flags);
flush_smp_call_function_queue(true);
- if (local_softirq_pending())
- do_softirq();
+
+ if (local_softirq_pending()) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ do_softirq();
+ } else {
+ struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
+
+ if (ksoftirqd && !task_is_running(ksoftirqd))
+ wake_up_process(ksoftirqd);
+ }
+ }
local_irq_restore(flags);
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 003ccf338d201..00fc43605c6b1 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -203,8 +203,7 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
{
/* Nothing to do if we already reached the limit */
if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
- printk_deferred(KERN_WARNING
- "CE: Reprogramming failure. Giving up\n");
+ pr_warn("CE: Reprogramming failure. Giving up\n");
dev->next_event = KTIME_MAX;
return -ETIME;
}
@@ -217,10 +216,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
if (dev->min_delta_ns > MIN_DELTA_LIMIT)
dev->min_delta_ns = MIN_DELTA_LIMIT;
- printk_deferred(KERN_WARNING
- "CE: %s increased min_delta_ns to %llu nsec\n",
- dev->name ? dev->name : "?",
- (unsigned long long) dev->min_delta_ns);
+ pr_warn("CE: %s increased min_delta_ns to %llu nsec\n",
+ dev->name ? dev->name : "?", (unsigned long long) dev->min_delta_ns);
return 0;
}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 406dccb79c2b6..829d7797811fb 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -939,9 +939,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
time_status |= STA_PPSERROR;
pps_errcnt++;
pps_dec_freq_interval();
- printk_deferred(KERN_ERR
- "hardpps: PPSERROR: interval too long - %lld s\n",
- freq_norm.sec);
+ pr_err("hardpps: PPSERROR: interval too long - %lld s\n", freq_norm.sec);
return 0;
}
@@ -954,8 +952,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
pps_freq = ftemp;
if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
- printk_deferred(KERN_WARNING
- "hardpps: PPSWANDER: change=%ld\n", delta);
+ pr_warn("hardpps: PPSWANDER: change=%ld\n", delta);
time_status |= STA_PPSWANDER;
pps_stbcnt++;
pps_dec_freq_interval();
@@ -999,9 +996,8 @@ static void hardpps_update_phase(long error)
* the time offset is updated.
*/
if (jitter > (pps_jitter << PPS_POPCORN)) {
- printk_deferred(KERN_WARNING
- "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
- jitter, (pps_jitter << PPS_POPCORN));
+ pr_warn("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
+ jitter, (pps_jitter << PPS_POPCORN));
time_status |= STA_PPSJITTER;
pps_jitcnt++;
} else if (time_status & STA_PPSTIME) {
@@ -1058,7 +1054,7 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
- printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
+ pr_err("hardpps: PPSJITTER: bad pulse\n");
return;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b348749a9fc62..a81beb3120386 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -203,22 +203,23 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
const char *name = tk->tkr_mono.clock->name;
if (offset > max_cycles) {
- printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
- offset, name, max_cycles);
- printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+ printk("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+ offset, name, max_cycles);
+ printk(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
} else {
if (offset > (max_cycles >> 1)) {
- printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
- offset, name, max_cycles >> 1);
- printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+ printk("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
+ offset, name, max_cycles >> 1);
+ printk(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
}
}
if (tk->underflow_seen) {
if (jiffies - tk->last_warning > WARNING_FREQ) {
- printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
- printk_deferred(" Your kernel is probably still fine.\n");
+ printk("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n",
+ name);
+ printk(" Please report this, consider using a different clocksource, if possible.\n");
+ printk(" Your kernel is probably still fine.\n");
tk->last_warning = jiffies;
}
tk->underflow_seen = 0;
@@ -226,9 +227,10 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
if (tk->overflow_seen) {
if (jiffies - tk->last_warning > WARNING_FREQ) {
- printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
- printk_deferred(" Your kernel is probably still fine.\n");
+ printk("WARNING: Overflow in clocksource '%s' observed, time update capped.\n",
+ name);
+ printk(" Please report this, consider using a different clocksource, if possible.\n");
+ printk(" Your kernel is probably still fine.\n");
tk->last_warning = jiffies;
}
tk->overflow_seen = 0;
@@ -1669,9 +1671,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
const struct timespec64 *delta)
{
if (!timespec64_valid_strict(delta)) {
- printk_deferred(KERN_WARNING
- "__timekeeping_inject_sleeptime: Invalid "
- "sleep delta value!\n");
+ pr_warn("%s: Invalid sleep delta value!\n", __func__);
return;
}
tk_xtime_add(tk, delta);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d9..149cc4b08d8eb 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -49,7 +49,7 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
int bin = min(fls(t->tv_sec), NUM_BINS-1);
sleep_time_bin[bin]++;
- pm_deferred_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n",
+ pm_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n",
(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f9139dc1262cd..1cc0e1af3db88 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2606,7 +2606,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
trace_flags |= TRACE_FLAG_NEED_RESCHED;
if (test_preempt_need_resched())
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+#ifdef CONFIG_PREEMPT_LAZY
+ if (need_resched_lazy())
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+#endif
+
+ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+ (preempt_lazy_count() & 0xff) << 16 |
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
}
@@ -4170,15 +4176,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
- "# | / _----=> need-resched \n"
- "# || / _---=> hardirq/softirq \n"
- "# ||| / _--=> preempt-depth \n"
- "# |||| / _-=> migrate-disable \n"
- "# ||||| / delay \n"
- "# cmd pid |||||| time | caller \n"
- "# \\ / |||||| \\ | / \n");
+ seq_puts(m, "# _--------=> CPU# \n"
+ "# / _-------=> irqs-off \n"
+ "# | / _------=> need-resched \n"
+ "# || / _-----=> need-resched-lazy\n"
+ "# ||| / _----=> hardirq/softirq \n"
+ "# |||| / _---=> preempt-depth \n"
+ "# ||||| / _--=> preempt-lazy-depth\n"
+ "# |||||| / _-=> migrate-disable \n"
+ "# ||||||| / delay \n"
+ "# cmd pid |||||||| time | caller \n"
+ "# \\ / |||||||| \\ | / \n");
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4212,14 +4220,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
- seq_printf(m, "# %.*s|||| / delay\n", prec, space);
- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
+ seq_printf(m, "# %.*s _-------=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space);
+ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
+ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space);
+ seq_printf(m, "# %.*s|||||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | ");
}
void
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4021b9a79f93f..b42fe2bef7537 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -184,6 +184,7 @@ static int trace_define_common_fields(void)
/* Holds both preempt_count and migrate_disable */
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
+ __common_field(unsigned char, preempt_lazy_count);
return ret;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3547e7176ff79..2745a023173ad 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
{
char hardsoft_irq;
char need_resched;
+ char need_resched_lazy;
char irqs_off;
int hardirq;
int softirq;
@@ -472,6 +473,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
break;
}
+ need_resched_lazy =
+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+
hardsoft_irq =
(nmi && hardirq) ? 'Z' :
nmi ? 'z' :
@@ -480,14 +484,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
softirq ? 's' :
'.' ;
- trace_seq_printf(s, "%c%c%c",
- irqs_off, need_resched, hardsoft_irq);
+ trace_seq_printf(s, "%c%c%c%c",
+ irqs_off, need_resched, need_resched_lazy,
+ hardsoft_irq);
if (entry->preempt_count & 0xf)
trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
else
trace_seq_putc(s, '.');
+ if (entry->preempt_lazy_count)
+ trace_seq_printf(s, "%x", entry->preempt_lazy_count);
+ else
+ trace_seq_putc(s, '.');
+
if (entry->preempt_count & 0xf0)
trace_seq_printf(s, "%x", entry->preempt_count >> 4);
else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 613917bbc4e73..1234ac4e18bef 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4826,9 +4826,7 @@ void show_one_workqueue(struct workqueue_struct *wq)
* drivers that queue work while holding locks
* also taken in their write paths.
*/
- printk_deferred_enter();
show_pwq(pwq);
- printk_deferred_exit();
}
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
/*
@@ -4859,7 +4857,6 @@ static void show_one_worker_pool(struct worker_pool *pool)
* queue work while holding locks also taken in their write
* paths.
*/
- printk_deferred_enter();
pr_info("pool %d:", pool->id);
pr_cont_pool_info(pool);
pr_cont(" hung=%us workers=%d",
@@ -4874,7 +4871,6 @@ static void show_one_worker_pool(struct worker_pool *pool)
first = false;
}
pr_cont("\n");
- printk_deferred_exit();
next_pool:
raw_spin_unlock_irqrestore(&pool->lock, flags);
/*
diff --git a/lib/bug.c b/lib/bug.c
index 45a0584f65417..03a87df69ed23 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
else
pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
(void *)bugaddr);
+ pr_flush(1000, true);
return BUG_TRAP_TYPE_BUG;
}
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 6b7f1bf6715d8..6e8ae42c7e273 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
* Permit this cpu to perform nested stack dumps while serialising
* against other CPUs
*/
- printk_cpu_lock_irqsave(flags);
+ raw_printk_cpu_lock_irqsave(flags);
__dump_stack(log_lvl);
- printk_cpu_unlock_irqrestore(flags);
+ raw_printk_cpu_unlock_irqrestore(flags);
}
EXPORT_SYMBOL(dump_stack_lvl);
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 2f17b488d58e1..2b9f797642f60 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -191,11 +191,13 @@ static int irq_poll_cpu_dead(unsigned int cpu)
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
+ local_bh_disable();
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
+ local_bh_enable();
return 0;
}
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 71652e1c397cf..8d24279fad055 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -26,6 +26,12 @@
#include <linux/rtmutex.h>
#include <linux/local_lock.h>
+#ifdef CONFIG_PREEMPT_RT
+# define NON_RT(...)
+#else
+# define NON_RT(...) __VA_ARGS__
+#endif
+
/*
* Change this to 1 if you want to see the failure printouts:
*/
@@ -139,7 +145,7 @@ static DEFINE_RT_MUTEX(rtmutex_Z2);
#endif
-static local_lock_t local_A = INIT_LOCAL_LOCK(local_A);
+static DEFINE_PER_CPU(local_lock_t, local_A);
/*
* non-inlined runtime initializers, to let separate locks share
@@ -712,12 +718,18 @@ GENERATE_TESTCASE(ABCDBCDA_rtmutex);
#undef E
+#ifdef CONFIG_PREEMPT_RT
+# define RT_PREPARE_DBL_UNLOCK() { migrate_disable(); rcu_read_lock(); }
+#else
+# define RT_PREPARE_DBL_UNLOCK()
+#endif
/*
* Double unlock:
*/
#define E() \
\
LOCK(A); \
+ RT_PREPARE_DBL_UNLOCK(); \
UNLOCK(A); \
UNLOCK(A); /* fail */
@@ -802,6 +814,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
#include "locking-selftest-wlock-hardirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-spin-softirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
@@ -810,10 +823,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
#include "locking-selftest-wlock-softirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+#endif
#undef E1
#undef E2
+#ifndef CONFIG_PREEMPT_RT
/*
* Enabling hardirqs with a softirq-safe lock held:
*/
@@ -846,6 +861,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
#undef E1
#undef E2
+#endif
+
/*
* Enabling irqs with an irq-safe lock held:
*/
@@ -875,6 +892,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
#include "locking-selftest-wlock-hardirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-spin-softirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
@@ -883,6 +901,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
#include "locking-selftest-wlock-softirq.h"
GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -921,6 +940,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
#include "locking-selftest-wlock-hardirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-spin-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
@@ -929,6 +949,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
#include "locking-selftest-wlock-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -969,6 +990,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
#include "locking-selftest-wlock-hardirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-spin-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
@@ -977,6 +999,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
#include "locking-selftest-wlock-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -1031,6 +1054,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
#include "locking-selftest-wlock-hardirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-spin-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
@@ -1039,6 +1063,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
#include "locking-selftest-wlock-softirq.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -1206,12 +1231,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-softirq.h"
#include "locking-selftest-rlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -1252,12 +1279,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-softirq.h"
#include "locking-selftest-rlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
+#endif
#undef E1
#undef E2
@@ -1306,12 +1335,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
+#ifndef CONFIG_PREEMPT_RT
#include "locking-selftest-softirq.h"
#include "locking-selftest-rlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
#include "locking-selftest-wlock.h"
GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
+#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
@@ -1320,7 +1351,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map)
# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map)
# define I_WW(x) lockdep_reset_lock(&x.dep_map)
-# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map)
+# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map))
#ifdef CONFIG_RT_MUTEXES
# define I_RTMUTEX(x) lockdep_reset_lock(&rtmutex_##x.dep_map)
#endif
@@ -1380,7 +1411,7 @@ static void reset_locks(void)
init_shared_classes();
raw_spin_lock_init(&raw_lock_A);
raw_spin_lock_init(&raw_lock_B);
- local_lock_init(&local_A);
+ local_lock_init(this_cpu_ptr(&local_A));
ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep);
memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2));
@@ -1398,7 +1429,13 @@ static int unexpected_testcase_failures;
static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
{
- unsigned long saved_preempt_count = preempt_count();
+ int saved_preempt_count = preempt_count();
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+ int saved_mgd_count = current->migration_disabled;
+#endif
+ int saved_rcu_count = current->rcu_read_lock_nesting;
+#endif
WARN_ON(irqs_disabled());
@@ -1432,6 +1469,18 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
* count, so restore it:
*/
preempt_count_set(saved_preempt_count);
+
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+ while (current->migration_disabled > saved_mgd_count)
+ migrate_enable();
+#endif
+
+ while (current->rcu_read_lock_nesting > saved_rcu_count)
+ rcu_read_unlock();
+ WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count);
+#endif
+
#ifdef CONFIG_TRACE_IRQFLAGS
if (softirq_count())
current->softirqs_enabled = 0;
@@ -1499,7 +1548,7 @@ static inline void print_testname(const char *testname)
#define DO_TESTCASE_2x2RW(desc, name, nr) \
DO_TESTCASE_2RW("hard-"desc, name##_hard, nr) \
- DO_TESTCASE_2RW("soft-"desc, name##_soft, nr) \
+ NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)) \
#define DO_TESTCASE_6x2x2RW(desc, name) \
DO_TESTCASE_2x2RW(desc, name, 123); \
@@ -1547,19 +1596,19 @@ static inline void print_testname(const char *testname)
#define DO_TESTCASE_2I(desc, name, nr) \
DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
- DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+ NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr));
#define DO_TESTCASE_2IB(desc, name, nr) \
DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \
- DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+ NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr));
#define DO_TESTCASE_6I(desc, name, nr) \
DO_TESTCASE_3("hard-"desc, name##_hard, nr); \
- DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+ NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr));
#define DO_TESTCASE_6IRW(desc, name, nr) \
DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \
- DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+ NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr));
#define DO_TESTCASE_2x3(desc, name) \
DO_TESTCASE_3(desc, name, 12); \
@@ -1651,6 +1700,22 @@ static void ww_test_fail_acquire(void)
#endif
}
+#ifdef CONFIG_PREEMPT_RT
+#define ww_mutex_base_lock(b) rt_mutex_lock(b)
+#define ww_mutex_base_trylock(b) rt_mutex_trylock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2) rt_mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b) rt_mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b) rt_mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b) rt_mutex_unlock(b)
+#else
+#define ww_mutex_base_lock(b) mutex_lock(b)
+#define ww_mutex_base_trylock(b) mutex_trylock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2) mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b) mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b) mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b) mutex_unlock(b)
+#endif
+
static void ww_test_normal(void)
{
int ret;
@@ -1665,50 +1730,50 @@ static void ww_test_normal(void)
/* mutex_lock (and indirectly, mutex_lock_nested) */
o.ctx = (void *)~0UL;
- mutex_lock(&o.base);
- mutex_unlock(&o.base);
+ ww_mutex_base_lock(&o.base);
+ ww_mutex_base_unlock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);
/* mutex_lock_interruptible (and *_nested) */
o.ctx = (void *)~0UL;
- ret = mutex_lock_interruptible(&o.base);
+ ret = ww_mutex_base_lock_interruptible(&o.base);
if (!ret)
- mutex_unlock(&o.base);
+ ww_mutex_base_unlock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);
/* mutex_lock_killable (and *_nested) */
o.ctx = (void *)~0UL;
- ret = mutex_lock_killable(&o.base);
+ ret = ww_mutex_base_lock_killable(&o.base);
if (!ret)
- mutex_unlock(&o.base);
+ ww_mutex_base_unlock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);
/* trylock, succeeding */
o.ctx = (void *)~0UL;
- ret = mutex_trylock(&o.base);
+ ret = ww_mutex_base_trylock(&o.base);
WARN_ON(!ret);
if (ret)
- mutex_unlock(&o.base);
+ ww_mutex_base_unlock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);
/* trylock, failing */
o.ctx = (void *)~0UL;
- mutex_lock(&o.base);
- ret = mutex_trylock(&o.base);
+ ww_mutex_base_lock(&o.base);
+ ret = ww_mutex_base_trylock(&o.base);
WARN_ON(ret);
- mutex_unlock(&o.base);
+ ww_mutex_base_unlock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);
/* nest_lock */
o.ctx = (void *)~0UL;
- mutex_lock_nest_lock(&o.base, &t);
- mutex_unlock(&o.base);
+ ww_mutex_base_lock_nest_lock(&o.base, &t);
+ ww_mutex_base_unlock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);
}
@@ -1721,7 +1786,7 @@ static void ww_test_two_contexts(void)
static void ww_test_diff_class(void)
{
WWAI(&t);
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
t.ww_class = NULL;
#endif
WWL(&o, &t);
@@ -1785,7 +1850,7 @@ static void ww_test_edeadlk_normal(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
o2.ctx = &t2;
mutex_release(&o2.base.dep_map, _THIS_IP_);
@@ -1801,7 +1866,7 @@ static void ww_test_edeadlk_normal(void)
o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ ww_mutex_base_unlock(&o2.base);
WWU(&o);
WWL(&o2, &t);
@@ -1811,7 +1876,7 @@ static void ww_test_edeadlk_normal_slow(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -1827,7 +1892,7 @@ static void ww_test_edeadlk_normal_slow(void)
o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ ww_mutex_base_unlock(&o2.base);
WWU(&o);
ww_mutex_lock_slow(&o2, &t);
@@ -1837,7 +1902,7 @@ static void ww_test_edeadlk_no_unlock(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
o2.ctx = &t2;
mutex_release(&o2.base.dep_map, _THIS_IP_);
@@ -1853,7 +1918,7 @@ static void ww_test_edeadlk_no_unlock(void)
o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ ww_mutex_base_unlock(&o2.base);
WWL(&o2, &t);
}
@@ -1862,7 +1927,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -1878,7 +1943,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ ww_mutex_base_unlock(&o2.base);
ww_mutex_lock_slow(&o2, &t);
}
@@ -1887,7 +1952,7 @@ static void ww_test_edeadlk_acquire_more(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -1908,7 +1973,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -1929,11 +1994,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
- mutex_lock(&o3.base);
+ ww_mutex_base_lock(&o3.base);
mutex_release(&o3.base.dep_map, _THIS_IP_);
o3.ctx = &t2;
@@ -1955,11 +2020,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
- mutex_lock(&o3.base);
+ ww_mutex_base_lock(&o3.base);
mutex_release(&o3.base.dep_map, _THIS_IP_);
o3.ctx = &t2;
@@ -1980,7 +2045,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -2005,7 +2070,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
{
int ret;
- mutex_lock(&o2.base);
+ ww_mutex_base_lock(&o2.base);
mutex_release(&o2.base.dep_map, _THIS_IP_);
o2.ctx = &t2;
@@ -2646,8 +2711,8 @@ static void wait_context_tests(void)
static void local_lock_2(void)
{
- local_lock_acquire(&local_A); /* IRQ-ON */
- local_lock_release(&local_A);
+ local_lock(&local_A); /* IRQ-ON */
+ local_unlock(&local_A);
HARDIRQ_ENTER();
spin_lock(&lock_A); /* IN-IRQ */
@@ -2656,18 +2721,18 @@ static void local_lock_2(void)
HARDIRQ_DISABLE();
spin_lock(&lock_A);
- local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */
- local_lock_release(&local_A);
+ local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */
+ local_unlock(&local_A);
spin_unlock(&lock_A);
HARDIRQ_ENABLE();
}
static void local_lock_3A(void)
{
- local_lock_acquire(&local_A); /* IRQ-ON */
+ local_lock(&local_A); /* IRQ-ON */
spin_lock(&lock_B); /* IRQ-ON */
spin_unlock(&lock_B);
- local_lock_release(&local_A);
+ local_unlock(&local_A);
HARDIRQ_ENTER();
spin_lock(&lock_A); /* IN-IRQ */
@@ -2676,18 +2741,18 @@ static void local_lock_3A(void)
HARDIRQ_DISABLE();
spin_lock(&lock_A);
- local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
- local_lock_release(&local_A);
+ local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+ local_unlock(&local_A);
spin_unlock(&lock_A);
HARDIRQ_ENABLE();
}
static void local_lock_3B(void)
{
- local_lock_acquire(&local_A); /* IRQ-ON */
+ local_lock(&local_A); /* IRQ-ON */
spin_lock(&lock_B); /* IRQ-ON */
spin_unlock(&lock_B);
- local_lock_release(&local_A);
+ local_unlock(&local_A);
HARDIRQ_ENTER();
spin_lock(&lock_A); /* IN-IRQ */
@@ -2696,8 +2761,8 @@ static void local_lock_3B(void)
HARDIRQ_DISABLE();
spin_lock(&lock_A);
- local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
- local_lock_release(&local_A);
+ local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+ local_unlock(&local_A);
spin_unlock(&lock_A);
HARDIRQ_ENABLE();
@@ -2812,7 +2877,7 @@ void locking_selftest(void)
printk("------------------------\n");
printk("| Locking API testsuite:\n");
printk("----------------------------------------------------------------------------\n");
- printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n");
+ printk(" | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n");
printk(" --------------------------------------------------------------------------\n");
init_shared_classes();
@@ -2885,12 +2950,11 @@ void locking_selftest(void)
DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
printk(" --------------------------------------------------------------------------\n");
-
/*
* irq-context testcases:
*/
DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
- DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+ NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A));
DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index f9e89001b52eb..569ec86760729 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -93,7 +93,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
* Allow nested NMI backtraces while serializing
* against other CPUs.
*/
- printk_cpu_lock_irqsave(flags);
+ raw_printk_cpu_lock_irqsave(flags);
if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
cpu, (void *)instruction_pointer(regs));
@@ -104,7 +104,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
else
dump_stack();
}
- printk_cpu_unlock_irqrestore(flags);
+ raw_printk_cpu_unlock_irqrestore(flags);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return true;
}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index e01a93f46f833..524cf65dce53c 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -47,9 +47,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
if (time_is_before_jiffies(rs->begin + rs->interval)) {
if (rs->missed) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
- printk_deferred(KERN_WARNING
- "%s: %d callbacks suppressed\n",
- func, rs->missed);
+ pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
rs->missed = 0;
}
}
diff --git a/localversion-rt b/localversion-rt
new file mode 100644
index 0000000000000..c3054d08a1129
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
+-rt2
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d2a00ad4e1dd1..ce8dcad7639f3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1922,11 +1922,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
return ERR_PTR(err);
}
- vbq = &get_cpu_var(vmap_block_queue);
+ get_cpu_light();
+ vbq = this_cpu_ptr(&vmap_block_queue);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
return vaddr;
}
@@ -2005,7 +2006,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
order = get_order(size);
rcu_read_lock();
- vbq = &get_cpu_var(vmap_block_queue);
+ get_cpu_light();
+ vbq = this_cpu_ptr(&vmap_block_queue);
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
unsigned long pages_off;
@@ -2028,7 +2030,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
break;
}
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
rcu_read_unlock();
/* Allocate new block if nothing was found */
diff --git a/mm/workingset.c b/mm/workingset.c
index 8c03afe1d67cb..4579883eb1091 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -433,6 +433,8 @@ static struct list_lru shadow_nodes;
void workingset_update_node(struct xa_node *node)
{
+ struct address_space *mapping;
+
/*
* Track non-empty nodes that contain only shadow entries;
* unlink those that contain pages or are being freed.
@@ -441,7 +443,8 @@ void workingset_update_node(struct xa_node *node)
* already where they should be. The list_empty() test is safe
* as node->private_list is protected by the i_pages lock.
*/
- VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */
+ mapping = container_of(node->array, struct address_space, i_pages);
+ lockdep_assert_held(&mapping->i_pages.xa_lock);
if (node->count && node->count == node->nr_values) {
if (list_empty(&node->private_list)) {
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b897ce3b399a1..6a58c1df0cc7c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -57,6 +57,7 @@
#include <linux/wait.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
+#include <linux/local_lock.h>
#define ZSPAGE_MAGIC 0x58
@@ -77,6 +78,20 @@
#define ZS_HANDLE_SIZE (sizeof(unsigned long))
+#ifdef CONFIG_PREEMPT_RT
+
+struct zsmalloc_handle {
+ unsigned long addr;
+ spinlock_t lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
+
+#else
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
+#endif
+
/*
* Object location (<PFN>, <obj_idx>) is encoded as
* a single (unsigned long) handle value.
@@ -293,6 +308,7 @@ struct zspage {
};
struct mapping_area {
+ local_lock_t lock;
char *vm_buf; /* copy buffer for objects that span pages */
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
static int create_cache(struct zs_pool *pool)
{
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
0, 0, NULL);
if (!pool->handle_cachep)
return 1;
@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool)
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
{
- return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+ void *p;
+
+ p = kmem_cache_alloc(pool->handle_cachep,
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+#ifdef CONFIG_PREEMPT_RT
+ if (p) {
+ struct zsmalloc_handle *zh = p;
+
+ spin_lock_init(&zh->lock);
+ }
+#endif
+ return (unsigned long)p;
}
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+ return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
{
kmem_cache_free(pool->handle_cachep, (void *)handle);
@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
static void record_obj(unsigned long handle, unsigned long obj)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ WRITE_ONCE(zh->addr, obj);
+#else
/*
* lsb of @obj represents handle lock while other bits
* represent object value the handle is pointing so
* updating shouldn't do store tearing.
*/
WRITE_ONCE(*(unsigned long *)handle, obj);
+#endif
}
/* zpool driver */
@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static bool is_zspage_isolated(struct zspage *zspage)
{
@@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
static unsigned long handle_to_obj(unsigned long handle)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ return zh->addr;
+#else
return *(unsigned long *)handle;
+#endif
}
static unsigned long obj_to_head(struct page *page, void *obj)
@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
static inline int testpin_tag(unsigned long handle)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ return spin_is_locked(&zh->lock);
+#else
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
}
static inline int trypin_tag(unsigned long handle)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ return spin_trylock(&zh->lock);
+#else
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
}
static void pin_tag(unsigned long handle) __acquires(bitlock)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ return spin_lock(&zh->lock);
+#else
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
}
static void unpin_tag(unsigned long handle) __releases(bitlock)
{
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+ return spin_unlock(&zh->lock);
+#else
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
}
static void reset_page(struct page *page)
@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
class = pool->size_class[class_idx];
off = (class->size * obj_idx) & ~PAGE_MASK;
- area = &get_cpu_var(zs_map_area);
+ local_lock(&zs_map_area.lock);
+ area = this_cpu_ptr(&zs_map_area);
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
__zs_unmap_object(area, pages, off, class->size);
}
- put_cpu_var(zs_map_area);
+ local_unlock(&zs_map_area.lock);
migrate_read_unlock(zspage);
unpin_tag(handle);
diff --git a/net/core/dev.c b/net/core/dev.c
index 15ac064b5562d..8b5cf8ad859b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
+ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
+ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
#endif
}
@@ -3048,6 +3048,7 @@ static void __netif_reschedule(struct Qdisc *q)
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
void __netif_schedule(struct Qdisc *q)
@@ -3110,6 +3111,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
__this_cpu_write(softnet_data.completion_queue, skb);
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
EXPORT_SYMBOL(__dev_kfree_skb_irq);
@@ -3837,7 +3839,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
* This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
+#ifdef CONFIG_PREEMPT_RT
+ contended = true;
+#else
contended = qdisc_is_running(q);
+#endif
if (unlikely(contended))
spin_lock(&q->busylock);
@@ -4668,6 +4674,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
rps_unlock(sd);
local_irq_restore(flags);
+ preempt_check_resched_rt();
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
@@ -4908,7 +4915,7 @@ static int netif_rx_internal(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
+ migrate_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -4918,14 +4925,14 @@ static int netif_rx_internal(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
+ migrate_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
+ put_cpu_light();
}
return ret;
}
@@ -4964,11 +4971,9 @@ int netif_rx_ni(struct sk_buff *skb)
trace_netif_rx_ni_entry(skb);
- preempt_disable();
+ local_bh_disable();
err = netif_rx_internal(skb);
- if (local_softirq_pending())
- do_softirq();
- preempt_enable();
+ local_bh_enable();
trace_netif_rx_ni_exit(err);
return err;
@@ -6412,12 +6417,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
sd->rps_ipi_list = NULL;
local_irq_enable();
+ preempt_check_resched_rt();
/* Send pending IPI's to kick RPS processing on remote cpus. */
net_rps_send_ipi(remsd);
} else
#endif
local_irq_enable();
+ preempt_check_resched_rt();
}
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
@@ -6495,6 +6502,7 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
EXPORT_SYMBOL(__napi_schedule);
@@ -11326,6 +11334,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+ preempt_check_resched_rt();
#ifdef CONFIG_RPS
remsd = oldsd->rps_ipi_list;
@@ -11339,7 +11348,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
- while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
@@ -11655,7 +11664,7 @@ static int __init net_dev_init(void)
INIT_WORK(flush, flush_backlog);
- skb_queue_head_init(&sd->input_pkt_queue);
+ skb_queue_head_init_raw(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
#ifdef CONFIG_XFRM_OFFLOAD
skb_queue_head_init(&sd->xfrm_backlog);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 75737267746f8..e460c84b1f8e4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -669,11 +671,8 @@ int inet_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
@@ -684,17 +683,20 @@ void inet_unhash(struct sock *sk)
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb = NULL;
spinlock_t *lock;
+ bool state_listen;
if (sk_unhashed(sk))
return;
if (sk->sk_state == TCP_LISTEN) {
+ state_listen = true;
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
+ spin_lock(&ilb->lock);
} else {
+ state_listen = false;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+ spin_lock_bh(lock);
}
- spin_lock_bh(lock);
if (sk_unhashed(sk))
goto unlock;
@@ -707,7 +709,10 @@ void inet_unhash(struct sock *sk)
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
unlock:
- spin_unlock_bh(lock);
+ if (state_listen)
+ spin_unlock(&ilb->lock);
+ else
+ spin_unlock_bh(lock);
}
EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 67c9114835c84..0a2e7f2283911 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1e99ba1b9d723..9b20e4d6bfe4d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
return;
- cpu = get_cpu();
+ cpu = get_cpu_light();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
atomic_long_inc(&pool->sp_stats.packets);
@@ -465,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
rqstp = NULL;
out_unlock:
rcu_read_unlock();
- put_cpu();
+ put_cpu_light();
trace_svc_xprt_do_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
Powered by blists - more mailing lists