diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 637ae8f..b4c900e 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -154,6 +154,8 @@ static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_82547_tx_fifo_stall(unsigned long data); static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +static int e1000_prep_queue_frame(struct sk_buff *skb, struct net_device *dev); +static int e1000_xmit_frames(struct sk_buff_head *list, struct net_device *dev); static struct net_device_stats * e1000_get_stats(struct net_device *netdev); static int e1000_change_mtu(struct net_device *netdev, int new_mtu); static int e1000_set_mac(struct net_device *netdev, void *p); @@ -932,6 +934,8 @@ e1000_probe(struct pci_dev *pdev, netdev->open = &e1000_open; netdev->stop = &e1000_close; netdev->hard_start_xmit = &e1000_xmit_frame; + netdev->hard_prep_xmit = &e1000_prep_queue_frame; + netdev->hard_batch_xmit = &e1000_xmit_frames; netdev->get_stats = &e1000_get_stats; netdev->set_multicast_list = &e1000_set_multi; netdev->set_mac_address = &e1000_set_mac; @@ -940,6 +944,7 @@ e1000_probe(struct pci_dev *pdev, e1000_set_ethtool_ops(netdev); netdev->tx_timeout = &e1000_tx_timeout; netdev->watchdog_timeo = 5 * HZ; + skb_queue_head_init(&netdev->blist); #ifdef CONFIG_E1000_NAPI netdev->poll = &e1000_clean; netdev->weight = 64; @@ -998,6 +1003,7 @@ e1000_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_LLTX; + netdev->features |= NETIF_F_BTX; adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); @@ -1155,6 +1161,7 @@ e1000_probe(struct pci_dev *pdev, if ((err = register_netdev(netdev))) goto err_register; + netdev->xmit_win = adapter->tx_ring->count>>1; /* tell the stack to leave us alone until e1000_open() is called */ netif_carrier_off(netdev); netif_stop_queue(netdev); @@ -1449,6 +1456,7 @@ e1000_open(struct net_device *netdev) /* fire a link status change interrupt to start the watchdog */ E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC); + printk("%s Batch window is %d\n",netdev->name, netdev->xmit_win); return E1000_SUCCESS; err_req_irq: @@ -1503,6 +1511,7 @@ e1000_close(struct net_device *netdev) e1000_check_mng_mode(&adapter->hw)) e1000_release_hw_control(adapter); + skb_queue_purge(&netdev->blist); return 0; } @@ -3098,6 +3107,18 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, } static void +e1000_kick_DMA(struct e1000_adapter *adapter, + struct e1000_tx_ring *tx_ring, int i) +{ + wmb(); + writel(i, adapter->hw.hw_addr + tx_ring->tdt); + /* we need this if more than one processor can write to our tail + ** at a time, it syncronizes IO on IA64/Altix systems */ + mmiowb(); +} + + +static void e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, int tx_flags, int count) { @@ -3139,17 +3160,7 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). */ - wmb(); - tx_ring->next_to_use = i; - writel(i, adapter->hw.hw_addr + tx_ring->tdt); - /* we need this if more than one processor can write to our tail - * at a time, it syncronizes IO on IA64/Altix systems */ - mmiowb(); } /** @@ -3256,54 +3267,60 @@ static int e1000_maybe_stop_tx(struct net_device *netdev, } #define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) +struct e1000_tx_cbdata { + int count; + unsigned int max_per_txd; + unsigned int nr_frags; + unsigned int mss; +}; + +#define E1000_SKB_CB(__skb) ((struct e1000_tx_cbdata *)&((__skb)->cb[0])) +#define NETDEV_TX_DROPPED -5 + static int -e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +e1000_prep_queue_frame(struct sk_buff *skb, struct net_device *netdev) { - struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_tx_ring *tx_ring; - unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; + unsigned int f; + struct e1000_adapter *adapter = netdev_priv(netdev); unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; - unsigned int tx_flags = 0; unsigned int len = skb->len; - unsigned long flags; - unsigned int nr_frags = 0; - unsigned int mss = 0; - int count = 0; - int tso; - unsigned int f; + + struct e1000_tx_cbdata *cb = E1000_SKB_CB(skb); + cb->mss = 0; + cb->nr_frags = 0; + cb->max_per_txd = E1000_MAX_DATA_PER_TXD; + cb->count = 0; + len -= skb->data_len; - /* This goes back to the question of how to logically map a tx queue - * to a flow. Right now, performance is impacted slightly negatively - * if using multiple tx queues. If the stack breaks away from a - * single qdisc implementation, we can look at this again. */ tx_ring = adapter->tx_ring; if (unlikely(skb->len <= 0)) { dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + return NETDEV_TX_DROPPED; } - /* 82571 and newer doesn't need the workaround that limited descriptor - * length to 4kB */ + /* 82571 and newer doesn't need the workaround that limited + descriptor length to 4kB */ if (adapter->hw.mac_type >= e1000_82571) - max_per_txd = 8192; + cb->max_per_txd = 8192; - mss = skb_shinfo(skb)->gso_size; + cb->mss = skb_shinfo(skb)->gso_size; /* The controller does a simple calculation to * make sure there is enough room in the FIFO before * initiating the DMA for each buffer. The calc is: * 4 = ceil(buffer len/mss). To make sure we don't * overrun the FIFO, adjust the max buffer len if mss * drops. */ - if (mss) { + if (cb->mss) { uint8_t hdr_len; - max_per_txd = min(mss << 2, max_per_txd); - max_txd_pwr = fls(max_per_txd) - 1; + cb->max_per_txd = min(cb->mss << 2, cb->max_per_txd); + max_txd_pwr = fls(cb->max_per_txd) - 1; /* TSO Workaround for 82571/2/3 Controllers -- if skb->data - * points to just header, pull a few bytes of payload from - * frags into skb->data */ + * points to just header, pull a few bytes of payload from + * frags into skb->data */ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) { switch (adapter->hw.mac_type) { @@ -3315,7 +3332,8 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * NOTE: this is a TSO only workaround * if end byte alignment not correct move us * into the next dword */ - if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4) + if ((unsigned long)(skb_tail_pointer(skb) - + 1) & 4) break; /* fall through */ case e1000_82571: @@ -3327,7 +3345,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) DPRINTK(DRV, ERR, "__pskb_pull_tail failed.\n"); dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + return NETDEV_TX_DROPPED; } len = skb->len - skb->data_len; break; @@ -3339,46 +3357,56 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /* reserve a descriptor for the offload context */ - if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) - count++; - count++; + if ((cb->mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) + cb->count++; + cb->count++; /* Controller Erratum workaround */ if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb)) - count++; + cb->count++; - count += TXD_USE_COUNT(len, max_txd_pwr); + cb->count += TXD_USE_COUNT(len, max_txd_pwr); if (adapter->pcix_82544) - count++; + cb->count++; /* work-around for errata 10 and it applies to all controllers * in PCI-X mode, so add one more descriptor to the count */ if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && - (len > 2015))) - count++; + (len > 2015))) + cb->count++; - nr_frags = skb_shinfo(skb)->nr_frags; - for (f = 0; f < nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, - max_txd_pwr); + cb->nr_frags = skb_shinfo(skb)->nr_frags; + for (f = 0; f < cb->nr_frags; f++) + cb->count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, + max_txd_pwr); if (adapter->pcix_82544) - count += nr_frags; - + cb->count += cb->nr_frags; if (adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573)) e1000_transfer_dhcp_info(adapter, skb); - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) - /* Collision - tell upper layer to requeue */ - return NETDEV_TX_LOCKED; + return NETDEV_TX_OK; +} + +/* invoked under tx_ring->lock */ +static int e1000_queue_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct e1000_tx_ring *tx_ring; + int tso; + unsigned int first; + struct e1000_adapter *adapter = netdev_priv(netdev); + unsigned int tx_flags = 0; + + struct e1000_tx_cbdata *cb = E1000_SKB_CB(skb); + tx_ring = adapter->tx_ring; /* need: count + 2 desc gap to keep tail from touching * head, otherwise try next time */ - if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) { - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, cb->count + 2))) { + netif_stop_queue(netdev); return NETDEV_TX_BUSY; } @@ -3386,7 +3414,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { netif_stop_queue(netdev); mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1); - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); return NETDEV_TX_BUSY; } } @@ -3401,8 +3428,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tso = e1000_tso(adapter, tx_ring, skb); if (tso < 0) { dev_kfree_skb_any(skb); - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); - return NETDEV_TX_OK; + return NETDEV_TX_DROPPED; } if (likely(tso)) { @@ -3418,16 +3444,157 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tx_flags |= E1000_TX_FLAGS_IPV4; e1000_tx_queue(adapter, tx_ring, tx_flags, - e1000_tx_map(adapter, tx_ring, skb, first, - max_per_txd, nr_frags, mss)); + e1000_tx_map(adapter, tx_ring, skb, first, + cb->max_per_txd, cb->nr_frags, cb->mss)); - netdev->trans_start = jiffies; + return NETDEV_TX_OK; +} + +/* called with tx_ring->lock held */ +static int real_e1000_xmit_frame(struct sk_buff *skb, struct net_device *dev) +{ + struct e1000_adapter *adapter = netdev_priv(dev); + int ret = NETDEV_TX_OK; + struct e1000_tx_ring *tx_ring = adapter->tx_ring; + + ret = e1000_queue_frame(skb, dev); + + if (ret == NETDEV_TX_OK) { + e1000_kick_DMA(adapter, tx_ring, adapter->tx_ring->next_to_use); + dev->trans_start = jiffies; + } + if (ret == NETDEV_TX_DROPPED) + ret = NETDEV_TX_OK; - /* Make sure there is space in the ring for the next send. */ - e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); + /* XXX: This seems so unnecessary, because if we are + * NETDEV_TX_BUSY already, we are already + * netif_queue_stopped(dev) + * but its what the driver does, resolve later */ + if (unlikely(e1000_maybe_stop_tx(dev, tx_ring, MAX_SKB_FRAGS + 2))) { + dev->xmit_win = 1; + netif_stop_queue(dev); + ret = NETDEV_TX_BUSY; + } else { + int rspace = E1000_DESC_UNUSED(tx_ring) - (MAX_SKB_FRAGS + 2); + dev->xmit_win = rspace; + } + + if (ret == NETDEV_TX_BUSY) + printk("Single: %s stopped with win of %d\n", + dev->name,dev->xmit_win); + return ret; +} + +/* single frame transmitter */ +static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + int ret = NETDEV_TX_OK; + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_tx_ring *tx_ring = adapter->tx_ring; + unsigned long flags; + struct e1000_tx_cbdata *cb; + + /* hopefully we will never cb data > 48 bytes .. */ + memset(skb->cb, 0, sizeof(skb->cb)); + ret = netdev->hard_prep_xmit(skb, netdev); + if (ret != NETDEV_TX_OK) + return NETDEV_TX_OK; + + if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { + /* Collision - tell upper layer to requeue */ + return NETDEV_TX_LOCKED; + } + + cb = E1000_SKB_CB(skb); + /* need: count + 2 desc gap to keep tail from touching + * head, otherwise try next time */ + /* XXX: This seems so unnecessary, because if we are + * NETDEV_TX_BUSY already, we are already + * netif_queue_stopped(dev) + * but its what the driver does, resolve later */ + if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, cb->count + 2))) { + spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + return NETDEV_TX_BUSY; + } + + ret = real_e1000_xmit_frame(skb, netdev); spin_unlock_irqrestore(&tx_ring->tx_lock, flags); - return NETDEV_TX_OK; + return ret; +} + +/* + * Batch transmit +*/ +static int +e1000_xmit_frames(struct sk_buff_head *list, struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + struct e1000_tx_ring *tx_ring = adapter->tx_ring; + int ret = NETDEV_TX_OK; + int didq = 0; + struct sk_buff *skb = NULL; + unsigned long flags; + + /* + * we should probably wait for this lock! + */ + if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { + /* Collision - tell upper layer to requeue */ + return NETDEV_TX_LOCKED; + } + + while ((skb = __skb_dequeue(list)) != NULL) { + memset(skb->cb, 0, sizeof(skb->cb)); /* remove? */ + ret = netdev->hard_prep_xmit(skb, netdev); + if (ret != NETDEV_TX_OK) + continue; + + /*XXX: This may be an opportunity to not give nit + * the packet if the dev ix TX BUSY ;-> */ + dev_do_xmit_nit(skb, netdev); + ret = e1000_queue_frame(skb, netdev); + if (ret == NETDEV_TX_OK) { + didq++; + } else { + /* should never happen, but murphy is around */ + if (ret == NETDEV_TX_BUSY) { + __skb_queue_head(list, skb); + break; + } + } + } + + /* we tried to send as many as we could .. */ + if (didq) { + e1000_kick_DMA(adapter, tx_ring, adapter->tx_ring->next_to_use); + netdev->trans_start = jiffies; + } + + if (ret == NETDEV_TX_DROPPED) + ret = NETDEV_TX_OK; + + /* XXX: This seems so unnecessary, because if we are + * NETDEV_TX_BUSY already, we are already + * netif_queue_stopped(dev) + * but its what the driver does, resolve later */ + /* need: MAX_SKB_FRAGS + 2 desc gap to keep tail from touching + * head, otherwise try next time */ + if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2))) { + netdev->xmit_win = 1; + netif_stop_queue(netdev); + ret = NETDEV_TX_BUSY; + } else { + int rspace = E1000_DESC_UNUSED(tx_ring) - (MAX_SKB_FRAGS + 2); + netdev->xmit_win = rspace; + printk("batch %s still awake with win of %d\n", + netdev->name, netdev->xmit_win); + } + spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + if (ret == NETDEV_TX_BUSY) + printk("Batch: %s stopped with win of %d\n", + netdev->name, netdev->xmit_win); + return ret; } /** @@ -4032,7 +4199,10 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, */ smp_mb(); if (netif_queue_stopped(netdev)) { + netdev->xmit_win = E1000_DESC_UNUSED(tx_ring); netif_wake_queue(netdev); + printk(" %s woken with win of %d\n", + netdev->name,netdev->xmit_win); ++adapter->restart_queue; } } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a2c6caa..e128ae3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -70,6 +70,7 @@ static int debug; #endif +#define NETDEV_LTT 4 /* the low threshold to open up the tx path */ /* Network device part of the driver */ static LIST_HEAD(tun_dev_list); @@ -86,9 +87,56 @@ static int tun_net_open(struct net_device *dev) static int tun_net_close(struct net_device *dev) { netif_stop_queue(dev); + skb_queue_purge(&dev->blist); return 0; } +/* Batch Net device start xmit + * combine with non-batching version + * */ +static int tun_net_bxmit(struct sk_buff_head *skbs, struct net_device *dev) +{ + struct sk_buff *skb; + int didq = 0; + struct tun_struct *tun = netdev_priv(dev); + u32 qlen = skb_queue_len(&tun->readq); + + /* Drop packet if interface is not attached */ + if (!tun->attached) { + tun->stats.tx_dropped+=skb_queue_len(&dev->blist); + skb_queue_purge(&dev->blist); + return NETDEV_TX_OK; + } + + while (skb_queue_len(&dev->blist)) { + skb = __skb_dequeue(skbs); + if (!skb) + break; + dev_do_xmit_nit(skb, dev); + skb_queue_tail(&tun->readq, skb); + didq++; + } + + qlen = skb_queue_len(&tun->readq); + if (qlen >= dev->tx_queue_len) { + netif_stop_queue(dev); + tun->stats.tx_fifo_errors++; + dev->xmit_win = 1; + } else { + dev->xmit_win = dev->tx_queue_len - qlen; + } + + if (didq) + dev->trans_start = jiffies; + + /* Notify and wake up reader process */ + if (tun->flags & TUN_FASYNC) + kill_fasync(&tun->fasync, SIGIO, POLL_IN); + wake_up_interruptible(&tun->read_wait); + + return NETDEV_TX_OK; +} + /* Net device start xmit */ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -207,6 +255,7 @@ static void tun_net_init(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; } + dev->xmit_win = dev->tx_queue_len>>1; /* handwave, handwave */ } /* Character device part */ @@ -382,7 +431,13 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, schedule(); continue; } - netif_wake_queue(tun->dev); + { + u32 t = skb_queue_len(&tun->readq); + if (netif_queue_stopped(tun->dev) && t < NETDEV_LTT) { + tun->dev->xmit_win = tun->dev->tx_queue_len; + netif_wake_queue(tun->dev); + } + } /** Decide whether to accept this packet. This code is designed to * behave identically to an Ethernet interface. Accept the packet if @@ -429,6 +484,7 @@ static void tun_setup(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); skb_queue_head_init(&tun->readq); + skb_queue_head_init(&dev->blist); init_waitqueue_head(&tun->read_wait); tun->owner = -1; @@ -436,6 +492,8 @@ static void tun_setup(struct net_device *dev) SET_MODULE_OWNER(dev); dev->open = tun_net_open; dev->hard_start_xmit = tun_net_xmit; + dev->hard_prep_xmit = NULL; + dev->hard_batch_xmit = tun_net_bxmit; dev->stop = tun_net_close; dev->get_stats = tun_net_stats; dev->ethtool_ops = &tun_ethtool_ops; @@ -458,7 +516,7 @@ static struct tun_struct *tun_get_by_name(const char *name) static int tun_set_iff(struct file *file, struct ifreq *ifr) { struct tun_struct *tun; - struct net_device *dev; + struct net_device *dev = NULL; int err; tun = tun_get_by_name(ifr->ifr_name); @@ -528,12 +586,15 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr) } DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); + dev->features |= NETIF_F_BTX; if (ifr->ifr_flags & IFF_NO_PI) tun->flags |= TUN_NO_PI; - if (ifr->ifr_flags & IFF_ONE_QUEUE) + if (ifr->ifr_flags & IFF_ONE_QUEUE) { tun->flags |= TUN_ONE_QUEUE; + dev->features &= ~NETIF_F_BTX; + } file->private_data = tun; tun->attached = 1; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f671cd2..85a1baf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -325,6 +325,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_BTX 8192 /* Capable of batch tx */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -450,6 +451,11 @@ struct net_device void *priv; /* pointer to private data */ int (*hard_start_xmit) (struct sk_buff *skb, struct net_device *dev); + int (*hard_batch_xmit) (struct sk_buff_head *list, + struct net_device *dev); + int (*hard_prep_xmit) (struct sk_buff *skb, + struct net_device *dev); + int xmit_win; /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -466,6 +472,10 @@ struct net_device struct list_head todo_list; /* device index hash chain */ struct hlist_node index_hlist; + /*XXX: Fix eventually to not allocate if device not + *batch capable + */ + struct sk_buff_head blist; struct net_device *link_watch_next; @@ -742,7 +752,12 @@ extern int dev_set_mac_address(struct net_device *, struct sockaddr *); extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); - +extern int do_gso_skb(struct sk_buff *skb, + struct sk_buff_head *skbs); +extern int do_possible_gso_skb(struct sk_buff *skb, + struct net_device *dev); +extern void dev_do_xmit_nit(struct sk_buff *skb, + struct net_device *dev); extern void dev_init(void); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index 8301e2a..0d728cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1372,6 +1372,47 @@ out_kfree_skb: return 0; } +int do_gso_skb(struct sk_buff *skb, struct sk_buff_head *skbs) +{ + int tdq = 0; + do { + struct sk_buff *nskb = skb->next; + + skb->next = nskb->next; + nskb->next = NULL; + tdq++; + __skb_queue_head(skbs, skb); + } while (skb->next); + skb->destructor = DEV_GSO_CB(skb)->destructor; + + return tdq; +} + +int do_possible_gso_skb(struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff_head *skbs = &dev->blist; + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) { + kfree_skb(skb); + return 0; + } + if (skb->next) + return do_gso_skb(skb, skbs); + } + + __skb_queue_head(skbs, skb); + return 1; +} + +/* invoked by driver when batching once figured skb is sane*/ +void dev_do_xmit_nit(struct sk_buff *skb, struct net_device *dev) +{ + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); +} + + #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ netif_tx_lock(dev); \ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1c..530de14 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3217,9 +3217,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_us++; pkt_dev->next_tx_ns -= 1000; } - } - - else { /* Retry it next time */ + } else { /* netif_queue_stopped -- Retry it next time */ pkt_dev->last_ok = 0; pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ed80054..4fe5a9b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -85,10 +85,12 @@ static inline int do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) { - if (unlikely(skb->next)) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); + if (skb) { + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); + } /* XXX: Could netif_schedule fail? Or is that fact we are * requeueing imply the hardware path is closed * and even if we fail, some interupt will wake us @@ -116,7 +118,10 @@ tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) int ret = handle_dev_cpu_collision(dev); if (ret == SCHED_TX_DROP) { - kfree_skb(skb); + if (skb) /* we are not batching */ + kfree_skb(skb); + else if (!skb_queue_empty(&dev->blist)) + skb_queue_purge(&dev->blist); return qdisc_qlen(q); } @@ -195,10 +200,104 @@ static inline int qdisc_restart(struct net_device *dev) return do_dev_requeue(skb, dev, q); } +static int try_get_tx_pkts(struct net_device *dev, struct Qdisc *q, int count) +{ + struct sk_buff *skb; + struct sk_buff_head *skbs = &dev->blist; + int tdq = 0; + + /* + * very unlikely, but who knows .. + * If this happens we dont try to grab more pkts + */ + if (!skb_queue_empty(&dev->blist)) + return skb_queue_len(&dev->blist); + + if (unlikely(dev->gso_skb)) { + skb = dev->gso_skb; + dev->gso_skb = NULL; + tdq = do_gso_skb(skb, skbs); + } + + if (tdq > count) + return tdq; /*we will stop here */ + + count -= tdq; + while (count > 0) { + skb = q->dequeue(q); + if (!skb) + break; + + tdq += do_possible_gso_skb(skb, dev); + count -= tdq; + } + + return tdq; +} + +static inline int try_tx_pkts(struct net_device *dev) +{ + + return dev->hard_batch_xmit(&dev->blist, dev); + +} + +/* same comments as in qdisc_restart apply; + * at some point use shared code with qdisc_restart*/ +int batch_qdisc_restart(struct net_device *dev) +{ + struct Qdisc *q = dev->qdisc; + unsigned lockless = (dev->features & NETIF_F_LLTX); + int count = dev->xmit_win; + int ret = 0; + + ret = try_get_tx_pkts(dev, q, count); + + if (ret == 0) + return qdisc_qlen(q); + + /* we have packets to send! */ + if (!lockless) { + if (!netif_tx_trylock(dev)) + return tx_islocked(NULL, dev, q); + } + + /* all clear .. */ + spin_unlock(&dev->queue_lock); + + ret = NETDEV_TX_BUSY; + if (!netif_queue_stopped(dev)) + ret = try_tx_pkts(dev); + + if (!lockless) + netif_tx_unlock(dev); + + spin_lock(&dev->queue_lock); + + q = dev->qdisc; + + /* most likely result, packet went ok */ + if (ret == NETDEV_TX_OK) + return qdisc_qlen(q); + /* only for lockless drivers .. */ + if (ret == NETDEV_TX_LOCKED && lockless) + return tx_islocked(NULL, dev, q); + + if (unlikely(ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING " BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + return do_dev_requeue(NULL, dev, q); +} + void __qdisc_run(struct net_device *dev) { + unsigned batching = (dev->features & NETIF_F_BTX); + do { - if (!qdisc_restart(dev)) + if (!batching && !qdisc_restart(dev)) + break; + else if (!batch_qdisc_restart(dev)) break; } while (!netif_queue_stopped(dev));