netdev - Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20080817.155723.214209606.davem@davemloft.net>
Date:	Sun, 17 Aug 2008 15:57:23 -0700 (PDT)
From:	David Miller <davem@...emloft.net>
To:	jarkao2@...il.com
Cc:	herbert@...dor.apana.org.au, netdev@...r.kernel.org
Subject: Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock().

From: Jarek Poplawski <jarkao2@...il.com>
Date: Thu, 14 Aug 2008 08:52:50 +0000

> On the other hand... such a flag would be probably for one thing only.
> And if we would have a "netif_scheduled_without_xmitting" counter this
> could probably make 2 in 1?

Here is how I propose to plug this hole.  The issue is that there is
this potential gap where both bits are clear while there is a context
that can reschedule the qdisc.

And I'm pretty sure the most desirable fix is to get rid of that gap
if it can easily be done.

It seems like it can be.  What we do in the patch below is something
like this:

1) The one code path that can leave both bits clear then reschedule
   is changed to only leave that state visible while holding the
   root qdisc's lock.

   All other code paths will not reschedule once they clear the bits.

2) dev_deactivate() unconditionally takes the root qdisc spinlock
   and just waits for both bits to clear.  This is now entirely
   sufficient to ensure that no pending runs of the qdisc remain.

Signed-off-by: David S. Miller <davem@...emloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 600bb23..896393d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1338,20 +1338,27 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/* We own the __QDISC_STATE_SCHED state bit, and know that
+ * we are the only entity which can schedule this qdisc on
+ * the output queue.
+ */
+static void __qdisc_schedule(struct Qdisc *q)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sd = &__get_cpu_var(softnet_data);
+	q->next_sched = sd->output_queue;
+	sd->output_queue = q;
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
 
 void __netif_schedule(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
-		struct softnet_data *sd;
-		unsigned long flags;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		q->next_sched = sd->output_queue;
-		sd->output_queue = q;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
+	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+		__qdisc_schedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 
@@ -1974,15 +1981,13 @@ static void net_tx_action(struct softirq_action *h)
 
 			head = head->next_sched;
 
-			smp_mb__before_clear_bit();
-			clear_bit(__QDISC_STATE_SCHED, &q->state);
-
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
+				clear_bit(__QDISC_STATE_SCHED, &q->state);
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
-				__netif_schedule(q);
+				__qdisc_schedule(q);
 			}
 		}
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4685746..d6ac170 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -647,7 +647,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	}
 }
 
-static bool some_qdisc_is_busy(struct net_device *dev, int lock)
+static bool some_qdisc_is_busy(struct net_device *dev)
 {
 	unsigned int i;
 
@@ -661,14 +661,12 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
 		q = dev_queue->qdisc_sleeping;
 		root_lock = qdisc_lock(q);
 
-		if (lock)
-			spin_lock_bh(root_lock);
+		spin_lock_bh(root_lock);
 
 		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
-		if (lock)
-			spin_unlock_bh(root_lock);
+		spin_unlock_bh(root_lock);
 
 		if (val)
 			return true;
@@ -689,25 +687,8 @@ void dev_deactivate(struct net_device *dev)
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	do {
-		while (some_qdisc_is_busy(dev, 0))
-			yield();
-
-		/*
-		 * Double-check inside queue lock to ensure that all effects
-		 * of the queue run are visible when we return.
-		 */
-		running = some_qdisc_is_busy(dev, 1);
-
-		/*
-		 * The running flag should never be set at this point because
-		 * we've already set dev->qdisc to noop_qdisc *inside* the same
-		 * pair of spin locks.  That is, if any qdisc_run starts after
-		 * our initial test it should see the noop_qdisc and then
-		 * clear the RUNNING bit before dropping the queue lock.  So
-		 * if it is set here then we've found a bug.
-		 */
-	} while (WARN_ON_ONCE(running));
+	while (some_qdisc_is_busy(dev))
+		yield();
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html