Subject: softirq; Prevent starvation of higher softirq vectors From: Thomas Gleixner <tglx@linutronix.de> Date: Thu, 24 Sep 2020 10:40:24 +0200 From: Thomas Gleixner <tglx@linutronix.de> The early termination of the softirq processing loop can lead to starvation of the higher numbered soft interrupt vectors because each run starts at the lowest bit. If the loop terminates then the already processed bits can be raised again before the next loop starts. If these lower bits run into the termination again, then a re-raise might starve the higher bits forever. To prevent this, store the leftovers of the previous run in the upper 16 bit of the local softirq_pending storage and ensure that these are processed before any newly raised bits are handled. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/softirq.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 8 deletions(-) --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -259,11 +259,23 @@ static inline bool __softirq_needs_break return need_resched() || __softirq_timeout(tbreak); } +/* + * local_softirq_pending() is split into two 16 bit words. The low word + * contains the bits set by raise_softirq(), the high word contains pending + * bits which have not been processed in an early terminated run. This is + * required to prevent starvation of the higher numbered softirqs. + */ +#define SIRQ_PREV_SHIFT 16 +#define SIRQ_PEND_MASK ((1U << SIRQ_PREV_SHIFT) -1) +#define SIRQ_PREV_MASK (SIRQ_PEND_MASK << SIRQ_PREV_SHIFT) +#define SIRQ_VECTOR_MASK (SIRQ_PREV_SHIFT - 1) + asmlinkage __visible void __softirq_entry __do_softirq(void) { unsigned int vec_nr, max_restart = MAX_SOFTIRQ_RESTART; u64 tbreak = sched_clock() + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; + u32 cur_pending, new_pending; struct softirq_action *h; unsigned long pending; bool in_hardirq; @@ -275,7 +287,7 @@ asmlinkage __visible void __softirq_entr */ current->flags &= ~PF_MEMALLOC; - pending = local_softirq_pending(); + cur_pending = local_softirq_pending(); account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); @@ -287,9 +299,17 @@ asmlinkage __visible void __softirq_entr local_irq_enable(); + /* + * Word swap pending to move the not yet handled bits of the previous + * run first and then clear the duplicates in the newly raised ones. + */ + swahw32s(&cur_pending); + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT); + for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) { int prev_count; + vec_nr &= SIRQ_VECTOR_MASK; __clear_bit(vec_nr, &pending); kstat_incr_softirqs_this_cpu(vec_nr); @@ -312,16 +332,38 @@ asmlinkage __visible void __softirq_entr rcu_softirq_qs(); local_irq_disable(); - if (pending) { - or_softirq_pending(pending); - } else { - pending = local_softirq_pending(); - if (!pending) - goto out; + /* Check for newly raised softirqs */ + new_pending = local_softirq_pending(); + + /* All processed and no new ones pending? */ + if (!pending && !new_pending) + goto out; + + /* Did the loop process all bits or got it terminated early? */ + if (!pending) { + /* Give the new onces precedence in case of terminatation */ + cur_pending = new_pending <<= SIRQ_PREV_SHIFT; + /* Restart possible? */ if (!__softirq_needs_break(tbreak) && --max_restart) goto restart; - } + } else { + /* + * Retain the unprocessed bits and swap @cur_pending back + * into normal ordering + */ + cur_pending = (u32)pending; + swahw32s(&cur_pending); + /* + * If the previous bits are done move the low word of + * @pending into the high word so it's processed first. + */ + if (!(cur_pending & SIRQ_PREV_MASK)) + cur_pending <<= SIRQ_PREV_SHIFT; + /* Merge the newly pending ones into the low word */ + cur_pending |= new_pending; + } + set_softirq_pending(cur_pending); wakeup_softirqd(); out: lockdep_softirq_end(in_hardirq);