netdev - [net-next PATCH 12/15] net: sched: lockless support for netif

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Date:   Tue, 23 Aug 2016 13:27:46 -0700
From:   John Fastabend <john.fastabend@...il.com>
To:     eric.dumazet@...il.com, jhs@...atatu.com, davem@...emloft.net,
        brouer@...hat.com, xiyou.wangcong@...il.com,
        alexei.starovoitov@...il.com
Cc:     john.r.fastabend@...el.com, netdev@...r.kernel.org,
        john.fastabend@...il.com
Subject: [net-next PATCH 12/15] net: sched: lockless support for
 netif_schedule

netif_schedule uses a bit QDISC_STATE_SCHED to tell the qdisc layer
if a run of the qdisc has been scheduler. This is important when
tearing down qdisc instances. We can not rcu_free an instance for
example if its possible that we might have outstanding references to
it.

Perhaps more importantly in the per cpu lockless case we need to
schedule a run of the qdisc on all qdiscs that are enqueu'ing packets
and hitting the gso_skb requeue logic or else the skb may get stuck
on the gso_skb queue without anything to finish the xmit.

This patch uses a reference counter instead of a bit to account for
the multiple CPUs.

Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
---
 include/net/sch_generic.h |    1 +
 net/core/dev.c            |   32 +++++++++++++++++++++++---------
 net/sched/sch_api.c       |    5 +++++
 net/sched/sch_generic.c   |   15 ++++++++++++++-
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 12df73d..cf4b3ea 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -93,6 +93,7 @@ struct Qdisc {
 	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
 	unsigned long		state;
+	unsigned long __percpu	*cpu_state;
 	struct Qdisc            *next_sched;
 	struct sk_buff		*skb_bad_txq;
 	struct rcu_head		rcu_head;
diff --git a/net/core/dev.c b/net/core/dev.c
index a0effa6..f638571 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2272,8 +2272,14 @@ static void __netif_reschedule(struct Qdisc *q)
 
 void __netif_schedule(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+	if (q->flags & TCQ_F_NOLOCK) {
+		unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+		if (!test_and_set_bit(__QDISC_STATE_SCHED, s))
+			__netif_reschedule(q);
+	} else if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
 		__netif_reschedule(q);
+	}
 }
 EXPORT_SYMBOL(__netif_schedule);
 
@@ -3920,15 +3926,23 @@ static void net_tx_action(struct softirq_action *h)
 			if (!(q->flags & TCQ_F_NOLOCK)) {
 				root_lock = qdisc_lock(q);
 				spin_lock(root_lock);
-			}
-			/* We need to make sure head->next_sched is read
-			 * before clearing __QDISC_STATE_SCHED
-			 */
-			smp_mb__before_atomic();
-			clear_bit(__QDISC_STATE_SCHED, &q->state);
-			qdisc_run(q);
-			if (!(q->flags & TCQ_F_NOLOCK))
+
+				/* We need to make sure head->next_sched is read
+				 * before clearing __QDISC_STATE_SCHED
+				 */
+				smp_mb__before_atomic();
+				clear_bit(__QDISC_STATE_SCHED, &q->state);
+
+				qdisc_run(q);
+
 				spin_unlock(root_lock);
+			} else {
+				unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+				smp_mb__before_atomic();
+				clear_bit(__QDISC_STATE_SCHED, s);
+				__qdisc_run(q);
+			}
 		}
 	}
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6c5bf13..89989a6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -975,6 +975,10 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				alloc_percpu(struct bad_txq_cell);
 			if (!sch->skb_bad_txq_cpu)
 				goto err_out4;
+
+			sch->cpu_state = alloc_percpu(unsigned long);
+			if (!sch->cpu_state)
+				goto err_out4;
 		}
 
 		if (tca[TCA_STAB]) {
@@ -1027,6 +1031,7 @@ err_out4:
 	free_percpu(sch->cpu_qstats);
 	free_percpu(sch->gso_cpu_skb);
 	free_percpu(sch->skb_bad_txq_cpu);
+	free_percpu(sch->cpu_state);
 	/*
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0b61b14..a6ec01c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -769,6 +769,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 		sch->skb_bad_txq_cpu = alloc_percpu(struct bad_txq_cell);
 		if (!sch->skb_bad_txq_cpu)
 			goto errout;
+
+		sch->cpu_state = alloc_percpu(unsigned long);
+		if (!sch->cpu_state)
+			goto errout;
 	}
 
 	return sch;
@@ -1038,7 +1042,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 		q = dev_queue->qdisc_sleeping;
 
 		if (q->flags & TCQ_F_NOLOCK) {
-			val = test_bit(__QDISC_STATE_SCHED, &q->state);
+			int i;
+
+			for_each_possible_cpu(i) {
+				unsigned long *s;
+
+				s = per_cpu_ptr(q->cpu_state, i);
+				val = test_bit(__QDISC_STATE_SCHED, s);
+				if (val)
+					break;
+			}
 		} else {
 			root_lock = qdisc_lock(q);
 			spin_lock_bh(root_lock);