lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Fri, 14 Sep 2007 14:31:56 +0530 From: Krishna Kumar <krkumar2@...ibm.com> To: johnpol@....mipt.ru, herbert@...dor.apana.org.au, hadi@...erus.ca, kaber@...sh.net, shemminger@...ux-foundation.org, davem@...emloft.net Cc: jagana@...ibm.com, Robert.Olsson@...a.slu.se, peter.p.waskiewicz.jr@...el.com, xma@...ibm.com, gaagaan@...il.com, kumarkr@...ux.ibm.com, rdreier@...co.com, rick.jones2@...com, mcarlson@...adcom.com, jeff@...zik.org, mchan@...adcom.com, general@...ts.openfabrics.org, netdev@...r.kernel.org, tgraf@...g.ch, randy.dunlap@...cle.com, Krishna Kumar <krkumar2@...ibm.com>, sri@...ibm.com Subject: [PATCH 3/10 REV5] [sched] Modify qdisc_run to support batching Modify qdisc_run() to support batching. Modify callers of qdisc_run to use batching, modify qdisc_restart to implement batching. Signed-off-by: Krishna Kumar <krkumar2@...ibm.com> --- include/linux/netdevice.h | 2 include/net/pkt_sched.h | 17 +++++-- net/core/dev.c | 45 ++++++++++++++++++ net/sched/sch_generic.c | 109 ++++++++++++++++++++++++++++++++++++---------- 4 files changed, 145 insertions(+), 28 deletions(-) diff -ruNp org/include/net/pkt_sched.h new/include/net/pkt_sched.h --- org/include/net/pkt_sched.h 2007-09-13 09:11:09.000000000 +0530 +++ new/include/net/pkt_sched.h 2007-09-14 10:25:36.000000000 +0530 @@ -80,13 +80,24 @@ extern struct qdisc_rate_table *qdisc_ge struct rtattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); -extern void __qdisc_run(struct net_device *dev); +static inline void qdisc_block(struct net_device *dev) +{ + while (test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); +} + +static inline void qdisc_unblock(struct net_device *dev) +{ + clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); +} + +extern void __qdisc_run(struct net_device *dev, struct sk_buff_head *blist); -static inline void qdisc_run(struct net_device *dev) +static inline void qdisc_run(struct net_device *dev, struct sk_buff_head *blist) { if (!netif_queue_stopped(dev) && !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) - __qdisc_run(dev); + __qdisc_run(dev, blist); } extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, diff -ruNp org/include/linux/netdevice.h new/include/linux/netdevice.h --- org/include/linux/netdevice.h 2007-09-13 09:11:09.000000000 +0530 +++ new/include/linux/netdevice.h 2007-09-14 10:26:21.000000000 +0530 @@ -1013,6 +1013,8 @@ extern int dev_set_mac_address(struct n struct sockaddr *); extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); +extern int dev_add_skb_to_blist(struct sk_buff *skb, + struct net_device *dev); extern int netdev_budget; diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c --- org/net/sched/sch_generic.c 2007-09-13 09:11:10.000000000 +0530 +++ new/net/sched/sch_generic.c 2007-09-14 10:25:36.000000000 +0530 @@ -59,26 +59,30 @@ static inline int qdisc_qlen(struct Qdis static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) { - if (unlikely(skb->next)) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); + if (skb) { + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); + } netif_schedule(dev); return 0; } -static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, - struct Qdisc *q) +static inline int dev_requeue_skb_wrapper(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) { - struct sk_buff *skb; - - if ((skb = dev->gso_skb)) - dev->gso_skb = NULL; - else - skb = q->dequeue(q); + if (dev->skb_blist) { + /* + * In case of tx full, batching drivers would have put all + * skbs into skb_blist so there is no skb to requeue. + */ + skb = NULL; + } - return skb; + return dev_requeue_skb(skb, dev, q); } static inline int handle_dev_cpu_collision(struct sk_buff *skb, @@ -91,10 +95,15 @@ static inline int handle_dev_cpu_collisi /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We - * detect it by checking xmit owner and drop the packet when - * deadloop is detected. Return OK to try the next skb. + * detect it by checking xmit owner and drop the packet (or + * all packets in batching case) when deadloop is detected. + * Return OK to try the next skb. */ - kfree_skb(skb); + if (likely(skb)) + kfree_skb(skb); + else if (!skb_queue_empty(dev->skb_blist)) + skb_queue_purge(dev->skb_blist); + if (net_ratelimit()) printk(KERN_WARNING "Dead loop on netdevice %s, " "fix it urgently!\n", dev->name); @@ -111,6 +120,53 @@ static inline int handle_dev_cpu_collisi return ret; } +#define DEQUEUE_SKB(q) (q->dequeue(q)) + +static inline struct sk_buff *get_gso_skb(struct net_device *dev) +{ + struct sk_buff *skb; + + if ((skb = dev->gso_skb)) + dev->gso_skb = NULL; + + return skb; +} + +/* + * Algorithm to get skb(s) is: + * - If gso skb present, return it. + * - Non batching drivers, or if the batch list is empty and there is + * 1 skb in the queue - dequeue skb and put it in *skbp to tell the + * caller to use the single xmit API. + * - Batching drivers where the batch list already contains atleast one + * skb, or if there are multiple skbs in the queue: keep dequeue'ing + * skb's upto a limit and set *skbp to NULL to tell the caller to use + * the multiple xmit API. + * + * Returns: + * 1 - atleast one skb is to be sent out, *skbp contains skb or NULL + * (in case >1 skbs present in blist for batching) + * 0 - no skbs to be sent. + */ +static inline int get_skb(struct net_device *dev, struct Qdisc *q, + struct sk_buff_head *blist, struct sk_buff **skbp) +{ + if ((*skbp = get_gso_skb(dev)) != NULL) + return 1; + + if (!blist || (!skb_queue_len(blist) && qdisc_qlen(q) <= 1)) { + return likely((*skbp = DEQUEUE_SKB(q)) != NULL); + } else { + struct sk_buff *skb; + int max = dev->tx_queue_len - skb_queue_len(blist); + + while (max > 0 && (skb = DEQUEUE_SKB(q)) != NULL) + max -= dev_add_skb_to_blist(skb, dev); + + return 1; /* there is atleast one skb in skb_blist */ + } +} + /* * NOTE: Called under dev->queue_lock with locally disabled BH. * @@ -130,7 +186,8 @@ static inline int handle_dev_cpu_collisi * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct net_device *dev, + struct sk_buff_head *blist) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; @@ -138,7 +195,7 @@ static inline int qdisc_restart(struct n int ret; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + if (unlikely(get_skb(dev, q, blist, &skb) == 0)) return 0; /* @@ -168,7 +225,7 @@ static inline int qdisc_restart(struct n switch (ret) { case NETDEV_TX_OK: - /* Driver sent out skb successfully */ + /* Driver sent out skb (or entire skb_blist) successfully */ ret = qdisc_qlen(q); break; @@ -183,21 +240,21 @@ static inline int qdisc_restart(struct n printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb_wrapper(skb, dev, q); break; } return ret; } -void __qdisc_run(struct net_device *dev) +void __qdisc_run(struct net_device *dev, struct sk_buff_head *blist) { do { - if (!qdisc_restart(dev)) + if (!qdisc_restart(dev, blist)) break; } while (!netif_queue_stopped(dev)); - clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + qdisc_unblock(dev); } static void dev_watchdog(unsigned long arg) @@ -575,6 +632,12 @@ void dev_deactivate(struct net_device *d qdisc = dev->qdisc; dev->qdisc = &noop_qdisc; + if (dev->skb_blist) { + /* Release skbs on batch list */ + if (!skb_queue_empty(dev->skb_blist)) + skb_queue_purge(dev->skb_blist); + } + qdisc_reset(qdisc); skb = dev->gso_skb; diff -ruNp org/net/core/dev.c new/net/core/dev.c --- org/net/core/dev.c 2007-09-14 10:24:27.000000000 +0530 +++ new/net/core/dev.c 2007-09-14 10:25:36.000000000 +0530 @@ -1542,6 +1542,46 @@ static int dev_gso_segment(struct sk_buf return 0; } +/* + * Add skb (skbs in case segmentation is required) to dev->skb_blist. No one + * can add to this list simultaneously since we are holding QDISC RUNNING + * bit. Also list is safe from simultaneous deletes too since skbs are + * dequeued only when the driver is invoked. + * + * Returns count of successful skb(s) added to skb_blist. + */ +int dev_add_skb_to_blist(struct sk_buff *skb, struct net_device *dev) +{ + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) { + kfree_skb(skb); + return 0; + } + + if (skb->next) { + int count = 0; + + do { + struct sk_buff *nskb = skb->next; + + skb->next = nskb->next; + __skb_queue_tail(dev->skb_blist, nskb); + count++; + } while (skb->next); + + /* Reset destructor for kfree_skb to work */ + skb->destructor = DEV_GSO_CB(skb)->destructor; + kfree_skb(skb); + return count; + } + } + __skb_queue_tail(dev->skb_blist, skb); + return 1; +} + int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { if (likely(skb)) { @@ -1697,7 +1737,7 @@ gso: /* reset queue_mapping to zero */ skb->queue_mapping = 0; rc = q->enqueue(skb, q); - qdisc_run(dev); + qdisc_run(dev, NULL); spin_unlock(&dev->queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; @@ -1895,7 +1935,8 @@ static void net_tx_action(struct softirq clear_bit(__LINK_STATE_SCHED, &dev->state); if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); + /* Send all skbs if driver supports batching */ + qdisc_run(dev, dev->skb_blist); spin_unlock(&dev->queue_lock); } else { netif_schedule(dev); - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists