lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 15 Sep 2020 19:56:09 +0800
From:   qianjun.kernel@...il.com
To:     tglx@...utronix.de, peterz@...radead.org, will@...nel.org,
        luto@...nel.org, linux-kernel@...r.kernel.org
Cc:     laoar.shao@...il.com, qais.yousef@....com, urezki@...il.com,
        jun qian <qianjun.kernel@...il.com>
Subject: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

From: jun qian <qianjun.kernel@...il.com>

Allow terminating the softirq processing loop without finishing the vectors.

Signed-off-by: jun qian <qianjun.kernel@...il.com>
---
 kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index cbb59b5..29cf079 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start)
 	return false;
 }
 
+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
+/*
+ * The pending_next_bit is recorded for the next processing order when
+ * the loop is broken. This per cpu variable is to solve the following
+ * scenarios:
+ * Assume bit 0 and 1 are pending when the processing starts. Now it
+ * breaks out after bit 0 has been handled and stores back bit 1 as
+ * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
+ * runs and handles bit 0, which takes more than the timeout. As a
+ * result the bit 0 processing can starve all other softirqs.
+ *
+ * so we need the pending_next_bit to record the next process order.
+ */
+DEFINE_PER_CPU(u32, pending_next_bit);
+
 asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
 	u64 start = sched_clock();
@@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 	unsigned int max_restart = MAX_SOFTIRQ_RESTART;
 	struct softirq_action *h;
 	unsigned long pending;
+	unsigned long pending_left, pending_again;
 	unsigned int vec_nr;
 	bool in_hardirq;
+	int next_bit;
+	unsigned long flags;
 
 	/*
 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
 	local_irq_enable();
 
-	for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
-		int prev_count;
-
-		__clear_bit(vec_nr, &pending);
-
-		h = softirq_vec + vec_nr;
-
-		prev_count = preempt_count();
-
-		kstat_incr_softirqs_this_cpu(vec_nr);
+	/*
+	 * pending_left means that the left bits unhandled when the loop is
+	 * broken without finishing the vectors. These bits will be handled
+	 * first in the next time. pending_again means that the new bits is
+	 * generated in the other time. These bits should be handled after
+	 * the pending_left bits have been handled.
+	 *
+	 * For example
+	 * If the pending bits is 1101010110, and the loop is broken after
+	 * the bit4 is handled. Then, the pending_next_bit will be 5, and
+	 * the pending_left is 1101000000, the pending_again is 000000110.
+	 */
+	next_bit = __this_cpu_read(pending_next_bit);
+	pending_left = pending &
+		(SOFTIRQ_PENDING_MASK << next_bit);
+	pending_again = pending &
+		(SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+	while (pending_left || pending_again) {
+		if  (pending_left) {
+			pending = pending_left;
+			pending_left = 0;
+		} else if (pending_again) {
+			pending = pending_again;
+			pending_again = 0;
+		} else
+			break;
+		for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
+			int prev_count;
+
+			__clear_bit(vec_nr, &pending);
+
+			h = softirq_vec + vec_nr;
+
+			prev_count = preempt_count();
+
+			kstat_incr_softirqs_this_cpu(vec_nr);
+
+			trace_softirq_entry(vec_nr);
+			h->action(h);
+			trace_softirq_exit(vec_nr);
+			if (unlikely(prev_count != preempt_count())) {
+				pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
+				       vec_nr, softirq_to_name[vec_nr], h->action,
+				       prev_count, preempt_count());
+				preempt_count_set(prev_count);
+			}
 
-		trace_softirq_entry(vec_nr);
-		h->action(h);
-		trace_softirq_exit(vec_nr);
-		if (unlikely(prev_count != preempt_count())) {
-			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
-			       vec_nr, softirq_to_name[vec_nr], h->action,
-			       prev_count, preempt_count());
-			preempt_count_set(prev_count);
+			/* Allow early break to avoid big sched delay */
+			if (pending && __softirq_needs_break(start)) {
+				__this_cpu_write(pending_next_bit, vec_nr + 1);
+				/*
+				 * Ensure that the remaining pending bits will be
+				 * handled in the next time.
+				 */
+				local_irq_save(flags);
+				or_softirq_pending(pending | pending_again);
+				local_irq_restore(flags);
+				break;
+			}
 		}
 	}
 
@@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 		rcu_softirq_qs();
 	local_irq_disable();
 
-	pending = local_softirq_pending();
-	if (pending) {
-		if (!__softirq_needs_break(start) && --max_restart)
-			goto restart;
+	/* get the unhandled bits */
+	pending |= pending_again;
+	if (!pending)
+		/*
+		 * If all of the pending bits have been handled,
+		 * reset the pending_next_bit to 0.
+		 */
+		__this_cpu_write(pending_next_bit, 0);
 
+	if (pending)
 		wakeup_softirqd();
+	else if (!__softirq_needs_break(start) && --max_restart) {
+		pending = local_softirq_pending();
+		if (pending)
+			goto restart;
 	}
 
 	lockdep_softirq_end(in_hardirq);
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ