Subject: softirq: resurrect softirq threads From: Mike Galbraith Date: Mon Jan 6 08:42:11 CET 2014 Some loads cannot tolerate the jitter induced by all softirqs being processed at the same priority. Let the user prioritize them again. Signed-off-by: Mike Galbraith --- Documentation/kernel-parameters.txt | 3 include/linux/interrupt.h | 9 - include/linux/sched.h | 6 + kernel/sched/cputime.c | 4 kernel/softirq.c | 182 +++++++++++++++++++++++++++++++----- 5 files changed, 173 insertions(+), 31 deletions(-) --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3086,6 +3086,9 @@ bytes respectively. Such letter suffixes Force threading of all interrupt handlers except those marked explicitly IRQF_NO_THREAD. + threadsirqs [KNL] + Enable or disable threading of all softirqs for -rt. + tmem [KNL,XEN] Enable the Transcendent memory driver if built-in. --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -383,8 +383,10 @@ struct softirq_action asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); static inline void thread_do_softirq(void) { do_softirq(); } +#define NR_SOFTIRQ_THREADS 1 #else extern void thread_do_softirq(void); +#define NR_SOFTIRQ_THREADS NR_SOFTIRQS #endif extern void open_softirq(int nr, void (*action)(struct softirq_action *)); @@ -405,12 +407,7 @@ extern void softirq_check_pending_idle(v */ DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); -DECLARE_PER_CPU(struct task_struct *, ksoftirqd); - -static inline struct task_struct *this_cpu_ksoftirqd(void) -{ - return this_cpu_read(ksoftirqd); -} +DECLARE_PER_CPU(struct task_struct * [NR_SOFTIRQ_THREADS], ksoftirqd); /* Try to send a softirq to a remote cpu. If this cannot be done, the * work will be queued to the local cpu. --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1121,6 +1121,7 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + unsigned sched_is_softirqd:1; pid_t pid; pid_t tgid; @@ -1484,6 +1485,11 @@ static inline struct pid *task_tgid(stru return task->group_leader->pids[PIDTYPE_PID].pid; } +static inline bool task_is_softirqd(struct task_struct *task) +{ + return task->sched_is_softirqd; +} + /* * Without tasklist or rcu lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -68,7 +68,7 @@ void irqtime_account_irq(struct task_str */ if (hardirq_count()) __this_cpu_add(cpu_hardirq_time, delta); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + else if (in_serving_softirq() && !task_is_softirqd(curr)) __this_cpu_add(cpu_softirq_time, delta); irq_time_write_end(); @@ -338,7 +338,7 @@ static void irqtime_account_process_tick cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; } else if (irqtime_account_si_update()) { cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; - } else if (this_cpu_ksoftirqd() == p) { + } else if (task_is_softirqd(p)) { /* * ksoftirqd time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -57,7 +57,14 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +DEFINE_PER_CPU(struct task_struct * [NR_SOFTIRQ_THREADS], ksoftirqd); + +static unsigned int __read_mostly threadsirqs; + +static struct task_struct *__this_cpu_ksoftirqd(int nr) +{ + return __this_cpu_read(ksoftirqd[nr && threadsirqs ? nr : 0]); +} char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", @@ -162,10 +169,10 @@ static inline void softirq_clr_runner(un * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static void wakeup_softirqd(void) +static void wakeup_softirqd(int nr) { /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __this_cpu_read(ksoftirqd); + struct task_struct *tsk = __this_cpu_ksoftirqd(nr); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -426,7 +433,7 @@ asmlinkage void __do_softirq(void) --max_restart) goto restart; - wakeup_softirqd(); + wakeup_softirqd(0); } lockdep_softirq_end(); @@ -474,7 +481,7 @@ void raise_softirq_irqoff(unsigned int n * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(); + wakeup_softirqd(0); } void __raise_softirq_irqoff(unsigned int nr) @@ -485,8 +492,18 @@ void __raise_softirq_irqoff(unsigned int static inline void local_bh_disable_nort(void) { local_bh_disable(); } static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } -static void ksoftirqd_set_sched_params(unsigned int cpu) { } -static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { } +static void ksoftirqd_set_sched_params(unsigned int cpu) +{ + local_irq_disable(); + current->sched_is_softirqd = 1; + local_irq_enable(); +} +static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) +{ + local_irq_disable(); + current->sched_is_softirqd = 0; + local_irq_enable(); +} #else /* !PREEMPT_RT_FULL */ @@ -647,15 +664,15 @@ static void do_raise_softirq_irqoff(unsi */ if (!in_irq() && current->softirq_nestcnt) current->softirqs_raised |= (1U << nr); - else if (__this_cpu_read(ksoftirqd)) - __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr); + else if (__this_cpu_ksoftirqd(nr)) + __this_cpu_ksoftirqd(nr)->softirqs_raised |= (1U << nr); } void __raise_softirq_irqoff(unsigned int nr) { do_raise_softirq_irqoff(nr); if (!in_irq() && !current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_softirqd(nr); } /* @@ -682,7 +699,7 @@ void raise_softirq_irqoff(unsigned int n * raise a WARN() if the condition is met. */ if (!current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_softirqd(nr); } static inline int ksoftirqd_softirq_pending(void) @@ -700,6 +717,7 @@ static inline void ksoftirqd_set_sched_p sched_setscheduler(current, SCHED_FIFO, ¶m); /* Take over all pending softirqs when starting */ local_irq_disable(); + current->sched_is_softirqd = 1; current->softirqs_raised = local_softirq_pending(); local_irq_enable(); } @@ -708,9 +726,26 @@ static inline void ksoftirqd_clr_sched_p { struct sched_param param = { .sched_priority = 0 }; + local_irq_disable(); + current->sched_is_softirqd = 0; + current->softirqs_raised = 0; + local_irq_enable(); sched_setscheduler(current, SCHED_NORMAL, ¶m); } +static int __init threadsoftirqs(char *str) +{ + int thread = 0; + + if (!get_option(&str, &thread)) + thread = 1; + + threadsirqs = !!thread; + + return 0; +} + +early_param("threadsirqs", threadsoftirqs); #endif /* PREEMPT_RT_FULL */ /* * Enter an interrupt context. @@ -748,15 +783,25 @@ static inline void invoke_softirq(void) */ do_softirq(); } else { - wakeup_softirqd(); + wakeup_softirqd(0); } #else /* PREEMPT_RT_FULL */ + struct task_struct *tsk; unsigned long flags; + u32 pending, nr; local_irq_save(flags); - if (__this_cpu_read(ksoftirqd) && - __this_cpu_read(ksoftirqd)->softirqs_raised) - wakeup_softirqd(); + pending = local_softirq_pending(); + + while (pending) { + nr = __ffs(pending); + tsk = __this_cpu_ksoftirqd(nr); + if (tsk && tsk->softirqs_raised) + wakeup_softirqd(nr); + if (!threadsirqs) + break; + pending &= ~(1U << nr); + } local_irq_restore(flags); #endif } @@ -1328,20 +1373,111 @@ static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; -static struct smp_hotplug_thread softirq_threads = { - .store = &ksoftirqd, - .setup = ksoftirqd_set_sched_params, - .cleanup = ksoftirqd_clr_sched_params, - .thread_should_run = ksoftirqd_should_run, - .thread_fn = run_ksoftirqd, - .thread_comm = "ksoftirqd/%u", +static struct smp_hotplug_thread softirq_threads[] = { + { + .store = &ksoftirqd[0], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "ksoftirqd/%u", + }, +#ifdef CONFIG_PREEMPT_RT_FULL + { + .store = &ksoftirqd[HI_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-high/%u", + }, + { + .store = &ksoftirqd[TIMER_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-timer/%u", + }, + { + .store = &ksoftirqd[NET_TX_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-net-tx/%u", + }, + { + .store = &ksoftirqd[NET_RX_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-net-rx/%u", + }, + { + .store = &ksoftirqd[BLOCK_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-blk/%u", + }, + { + .store = &ksoftirqd[BLOCK_IOPOLL_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-blk-pol/%u", + }, + { + .store = &ksoftirqd[TASKLET_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-tasklet/%u", + }, + { + .store = &ksoftirqd[SCHED_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-sched/%u", + }, + { + .store = &ksoftirqd[HRTIMER_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-hrtimer/%u", + }, + { + .store = &ksoftirqd[RCU_SOFTIRQ], + .setup = ksoftirqd_set_sched_params, + .cleanup = ksoftirqd_clr_sched_params, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "sirq-rcu/%u", + }, +#endif }; static __init int spawn_ksoftirqd(void) { + struct smp_hotplug_thread *t = &softirq_threads[threadsirqs]; + int i, threads = NR_SOFTIRQ_THREADS; + register_cpu_notifier(&cpu_nfb); - BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); + for (i = 0; i < threads; i++, t++) { + BUG_ON(smpboot_register_percpu_thread(t)); + if (!threadsirqs) + break; + } return 0; }