lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 4 Aug 2009 12:25:07 +0530
From:	Krishna Kumar2 <krkumar2@...ibm.com>
To:	Jarek Poplawski <jarkao2@...il.com>
Cc:	davem@...emloft.net, herbert@...dor.apana.org.au, kaber@...sh.net,
	netdev@...r.kernel.org
Subject: Re: [RFC] [PATCH] Avoid enqueuing skb for default qdiscs

Jarek Poplawski <jarkao2@...il.com> wrote on 08/04/2009 01:08:47 AM:

Hi Jarek,

> Since this quoted part looks very similarly I'm using it here, so
> forget my note if something has changed.

Thanks for your feedback. Yes, the inline patch is the same as the
attached one.

> >> @@ -306,6 +312,9 @@ extern struct Qdisc *qdisc_create_dflt(s
> >>                     struct Qdisc_ops *ops, u32 parentid);
> >>  extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
> >>                 struct qdisc_size_table *stab);
> >> +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> >> +            struct net_device *dev, struct netdev_queue *txq,
> >> +            spinlock_t *root_lock);
>
> Probably this would look better in pkt_sched.h around __qdisc_run()?

I had it originally in pkt_sched.h, for some reason I had moved it to
sch_generic.h, I will move it back.

> >> +static inline int __dev_hw_xmit(struct sk_buff *skb, struct Qdisc *q,
>
> I guess, we don't touch "hw" in this function yet?

Right - this was due to lack of a good name. Maybe I can call it
__dev_xmit_skb?

> >> -   kfree_skb(qdisc->gso_skb);
> >> -   qdisc->gso_skb = NULL;
> >> +   if (qdisc->gso_skb) {
> >> +      kfree_skb(qdisc->gso_skb);
> >> +      qdisc->gso_skb = NULL;
> >> +      qdisc->q.qlen--;
>
> You don't need this here: qdiscs should zero it in ops->reset().

I will keep it around, as you said in your other mail.

> >> +      /* Can by-pass the queue discipline for default qdisc */
> >> +      qdisc->flags = TCQ_F_CAN_BYPASS;
>
> "|=" looks nicer to me.

Agreed - and required if qdisc_create_dflt (and the functions it calls)
is changed to set the flags.

> >>     } else {
> >>        qdisc =  &noqueue_qdisc;
> >>     }
>
> I wonder if we shouldn't update bstats yet. Btw, try checkpatch.

Rate estimator isn't used for default qdisc, so is anything
required? Also, checkpatch suggested to change the original code:
        "if (unlikely((skb = dequeue_skb(q)) == NULL))"
since it appears as "new code", hope that change is fine.

The patch after incorporating the review comments is both attached
and inlined, since my mailer is still not working (I sent this mail
about an hour back but this one didn't seem to have made it while
one of my latter mails did).

Thanks,

- KK

 (See attached file: patch4)

----------------------------------------------------------------------
 include/net/pkt_sched.h   |    3 +
 include/net/sch_generic.h |    6 ++
 net/core/dev.c            |   46 ++++++++++++-----
 net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
 4 files changed, 99 insertions(+), 49 deletions(-)

diff -ruNp org/include/net/pkt_sched.h new/include/net/pkt_sched.h
--- org/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530
+++ new/include/net/pkt_sched.h 2009-08-04 11:42:18.000000000 +0530
@@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 extern void qdisc_put_stab(struct qdisc_size_table *tab);
 extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+                          struct net_device *dev, struct netdev_queue
*txq,
+                          spinlock_t *root_lock);

 extern void __qdisc_run(struct Qdisc *q);

diff -ruNp org/include/net/sch_generic.h new/include/net/sch_generic.h
--- org/include/net/sch_generic.h       2009-03-24 08:54:16.000000000 +0530
+++ new/include/net/sch_generic.h       2009-08-04 11:02:39.000000000 +0530
@@ -45,6 +45,7 @@ struct Qdisc
 #define TCQ_F_BUILTIN          1
 #define TCQ_F_THROTTLED                2
 #define TCQ_F_INGRESS          4
+#define TCQ_F_CAN_BYPASS       8
 #define TCQ_F_WARN_NONWC       (1 << 16)
        int                     padded;
        struct Qdisc_ops        *ops;
@@ -182,6 +183,11 @@ struct qdisc_skb_cb {
        char                    data[];
 };

+static inline int qdisc_qlen(struct Qdisc *q)
+{
+       return q->q.qlen;
+}
+
 static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
 {
        return (struct qdisc_skb_cb *)skb->cb;
diff -ruNp org/net/core/dev.c new/net/core/dev.c
--- org/net/core/dev.c  2009-07-27 09:08:24.000000000 +0530
+++ new/net/core/dev.c  2009-08-04 11:44:57.000000000 +0530
@@ -1786,6 +1786,38 @@ static struct netdev_queue *dev_pick_tx(
        return netdev_get_tx_queue(dev, queue_index);
 }

+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+                                struct net_device *dev,
+                                struct netdev_queue *txq)
+{
+       spinlock_t *root_lock = qdisc_lock(q);
+       int rc;
+
+       spin_lock(root_lock);
+       if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+               kfree_skb(skb);
+               rc = NET_XMIT_DROP;
+       } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+                  !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+               /*
+                * This is a work-conserving queue; there are no old skbs
+                * waiting to be sent out; and the qdisc is not running -
+                * xmit the skb directly.
+                */
+               if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+                       __qdisc_run(q);
+               else
+                       clear_bit(__QDISC_STATE_RUNNING, &q->state);
+               rc = NET_XMIT_SUCCESS;
+       } else {
+               rc = qdisc_enqueue_root(skb, q);
+               qdisc_run(q);
+       }
+       spin_unlock(root_lock);
+
+       return rc;
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1859,19 +1891,7 @@ gso:
        skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
        if (q->enqueue) {
-               spinlock_t *root_lock = qdisc_lock(q);
-
-               spin_lock(root_lock);
-
-               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
&q->state))) {
-                       kfree_skb(skb);
-                       rc = NET_XMIT_DROP;
-               } else {
-                       rc = qdisc_enqueue_root(skb, q);
-                       qdisc_run(q);
-               }
-               spin_unlock(root_lock);
-
+               rc = __dev_xmit_skb(skb, q, dev, txq);
                goto out;
        }

diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c
--- org/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530
+++ new/net/sched/sch_generic.c 2009-08-04 11:00:38.000000000 +0530
@@ -37,15 +37,11 @@
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */

-static inline int qdisc_qlen(struct Qdisc *q)
-{
-       return q->q.qlen;
-}
-
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
        q->gso_skb = skb;
        q->qstats.requeues++;
+       q->q.qlen++;    /* it's still part of the queue */
        __netif_schedule(q);

        return 0;
@@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk

                /* check the reason of requeuing without tx lock first */
                txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-               if (!netif_tx_queue_stopped(txq) &&
!netif_tx_queue_frozen(txq))
+               if (!netif_tx_queue_stopped(txq) &&
+                   !netif_tx_queue_frozen(txq)) {
                        q->gso_skb = NULL;
-               else
+                       q->q.qlen--;
+               } else
                        skb = NULL;
        } else {
                skb = q->dequeue(q);
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi
 }

 /*
- * NOTE: Called under qdisc_lock(q) with locally disabled BH.
- *
- * __QDISC_STATE_RUNNING guarantees only one CPU can process
- * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
- * this queue.
- *
- *  netif_tx_lock serializes accesses to device driver.
- *
- *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
- *  if one is grabbed, another must be free.
- *
- * Note, that this procedure can be called by a watchdog timer
+ * Transmit one skb, and handle the return status as required. Holding the
+ * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
+ * function.
  *
  * Returns to the caller:
  *                             0  - queue is empty or throttled.
  *                             >0 - queue is not empty.
- *
  */
-static inline int qdisc_restart(struct Qdisc *q)
+int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+                   struct net_device *dev, struct netdev_queue *txq,
+                   spinlock_t *root_lock)
 {
-       struct netdev_queue *txq;
        int ret = NETDEV_TX_BUSY;
-       struct net_device *dev;
-       spinlock_t *root_lock;
-       struct sk_buff *skb;
-
-       /* Dequeue packet */
-       if (unlikely((skb = dequeue_skb(q)) == NULL))
-               return 0;
-
-       root_lock = qdisc_lock(q);

        /* And release qdisc */
        spin_unlock(root_lock);

-       dev = qdisc_dev(q);
-       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-
        HARD_TX_LOCK(dev, txq, smp_processor_id());
        if (!netif_tx_queue_stopped(txq) &&
            !netif_tx_queue_frozen(txq))
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q
        return ret;
 }

+/*
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
+ *
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *                             0  - queue is empty or throttled.
+ *                             >0 - queue is not empty.
+ *
+ */
+static inline int qdisc_restart(struct Qdisc *q)
+{
+       struct netdev_queue *txq;
+       struct net_device *dev;
+       spinlock_t *root_lock;
+       struct sk_buff *skb;
+
+       /* Dequeue packet */
+       skb = dequeue_skb(q);
+       if (unlikely(!skb))
+               return 0;
+
+       root_lock = qdisc_lock(q);
+       dev = qdisc_dev(q);
+       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+       return sch_direct_xmit(skb, q, dev, txq, root_lock);
+}
+
 void __qdisc_run(struct Qdisc *q)
 {
        unsigned long start_time = jiffies;
@@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
        if (ops->reset)
                ops->reset(qdisc);

-       kfree_skb(qdisc->gso_skb);
-       qdisc->gso_skb = NULL;
+       if (qdisc->gso_skb) {
+               kfree_skb(qdisc->gso_skb);
+               qdisc->gso_skb = NULL;
+               qdisc->q.qlen--;
+       }
 }
 EXPORT_SYMBOL(qdisc_reset);

@@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str
                        printk(KERN_INFO "%s: activation failed\n",
dev->name);
                        return;
                }
+
+               /* Can by-pass the queue discipline for default qdisc */
+               qdisc->flags |= TCQ_F_CAN_BYPASS;
        } else {
                qdisc =  &noqueue_qdisc;
        }

Download attachment "patch4" of type "application/octet-stream" (9455 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ