[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1335522889.2775.231.camel@edumazet-glaptop>
Date: Fri, 27 Apr 2012 12:34:49 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: David Miller <davem@...emloft.net>
Cc: netdev <netdev@...r.kernel.org>,
Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>,
Tom Herbert <therbert@...gle.com>,
Neal Cardwell <ncardwell@...gle.com>,
Ben Hutchings <bhutchings@...arflare.com>,
Matt Carlson <mcarlson@...adcom.com>,
Jeff Kirsher <jeffrey.t.kirsher@...el.com>,
Michael Chan <mchan@...adcom.com>,
Herbert Xu <herbert@...dor.apana.org.au>,
Maciej Żenczykowski <maze@...gle.com>
Subject: [PATCH 2/4 net-next] tg3: provide frags as skb head
From: Eric Dumazet <edumazet@...gle.com>
This patch converts tg3 driver, one of our reference drivers, to use new
build_skb() api in frag mode.
Instead of using kmalloc() to allocate the memory block that will be
used by build_skb() as skb->head, we use a page fragment.
This is a followup of patch "net: allow skb->head to be a page fragment"
This allows GRO, TCP coalescing, and splice() to be more efficient.
Incidentally, this also removes SLUB slow path contention in kfree()
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>
Cc: Herbert Xu <herbert@...dor.apana.org.au>
Cc: Maciej Żenczykowski <maze@...gle.com>
Cc: Neal Cardwell <ncardwell@...gle.com>
Cc: Tom Herbert <therbert@...gle.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Cc: Ben Hutchings <bhutchings@...arflare.com>
Cc: Matt Carlson <mcarlson@...adcom.com>
Cc: Michael Chan <mchan@...adcom.com>
---
drivers/net/ethernet/broadcom/tg3.c | 74 ++++++++++++++++++++++----
drivers/net/ethernet/broadcom/tg3.h | 2
2 files changed, 66 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d481b0a..c1a580e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -195,6 +195,15 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
#define TG3_RX_OFFSET(tp) (NET_SKB_PAD)
#endif
+/* This driver uses the new build_skb() API providing a frag as skb->head
+ * This strategy permits better GRO aggregation, better TCP coalescing, and
+ * better splice() implementation (avoids a copy from head to a page), at
+ * minimal memory cost.
+ * In this 2048 bytes block, we have enough room to store the MTU=1500 frame
+ * and the struct skb_shared_info.
+ */
+#define TG3_FRAGSIZE 2048
+
/* minimum number of free TX descriptors required to wake up TX process */
#define TG3_TX_WAKEUP_THRESH(tnapi) ((tnapi)->tx_pending / 4)
#define TG3_TX_BD_DMA_MAX_2K 2048
@@ -5617,17 +5626,48 @@ static void tg3_tx(struct tg3_napi *tnapi)
}
}
+static void *tg3_frag_alloc(struct tg3_rx_prodring_set *tpr)
+{
+ void *data;
+
+ if (tpr->rx_page_size < TG3_FRAGSIZE) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+
+ if (!page)
+ return NULL;
+ atomic_add((PAGE_SIZE / TG3_FRAGSIZE) - 1, &page->_count);
+ tpr->rx_page_addr = page_address(page);
+ tpr->rx_page_size = PAGE_SIZE;
+ }
+ data = tpr->rx_page_addr;
+ tpr->rx_page_addr += TG3_FRAGSIZE;
+ tpr->rx_page_size -= TG3_FRAGSIZE;
+ return data;
+}
+
+static void tg3_frag_free(bool is_frag, void *data)
+{
+ if (is_frag)
+ put_page(virt_to_head_page(data));
+ else
+ kfree(data);
+}
+
static void tg3_rx_data_free(struct tg3 *tp, struct ring_info *ri, u32 map_sz)
{
+ unsigned int skb_size = SKB_DATA_ALIGN(map_sz + TG3_RX_OFFSET(tp)) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
if (!ri->data)
return;
pci_unmap_single(tp->pdev, dma_unmap_addr(ri, mapping),
map_sz, PCI_DMA_FROMDEVICE);
- kfree(ri->data);
+ tg3_frag_free(skb_size <= TG3_FRAGSIZE, ri->data);
ri->data = NULL;
}
+
/* Returns size of skb allocated or < 0 on error.
*
* We only need to fill in the address because the other members
@@ -5640,7 +5680,8 @@ static void tg3_rx_data_free(struct tg3 *tp, struct ring_info *ri, u32 map_sz)
* (to fetch the error flags, vlan tag, checksum, and opaque cookie).
*/
static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
- u32 opaque_key, u32 dest_idx_unmasked)
+ u32 opaque_key, u32 dest_idx_unmasked,
+ unsigned int *frag_size)
{
struct tg3_rx_buffer_desc *desc;
struct ring_info *map;
@@ -5675,7 +5716,13 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
*/
skb_size = SKB_DATA_ALIGN(data_size + TG3_RX_OFFSET(tp)) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- data = kmalloc(skb_size, GFP_ATOMIC);
+ if (skb_size <= TG3_FRAGSIZE) {
+ data = tg3_frag_alloc(tpr);
+ *frag_size = TG3_FRAGSIZE;
+ } else {
+ data = kmalloc(skb_size, GFP_ATOMIC);
+ *frag_size = 0;
+ }
if (!data)
return -ENOMEM;
@@ -5683,8 +5730,8 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
data + TG3_RX_OFFSET(tp),
data_size,
PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(tp->pdev, mapping)) {
- kfree(data);
+ if (unlikely(pci_dma_mapping_error(tp->pdev, mapping))) {
+ tg3_frag_free(skb_size <= TG3_FRAGSIZE, data);
return -EIO;
}
@@ -5835,18 +5882,19 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
if (len > TG3_RX_COPY_THRESH(tp)) {
int skb_size;
+ unsigned int frag_size;
skb_size = tg3_alloc_rx_data(tp, tpr, opaque_key,
- *post_ptr);
+ *post_ptr, &frag_size);
if (skb_size < 0)
goto drop_it;
pci_unmap_single(tp->pdev, dma_addr, skb_size,
PCI_DMA_FROMDEVICE);
- skb = build_skb(data, 0);
+ skb = build_skb(data, frag_size);
if (!skb) {
- kfree(data);
+ tg3_frag_free(frag_size != 0, data);
goto drop_it_no_recycle;
}
skb_reserve(skb, TG3_RX_OFFSET(tp));
@@ -7279,7 +7327,10 @@ static int tg3_rx_prodring_alloc(struct tg3 *tp,
/* Now allocate fresh SKBs for each rx ring. */
for (i = 0; i < tp->rx_pending; i++) {
- if (tg3_alloc_rx_data(tp, tpr, RXD_OPAQUE_RING_STD, i) < 0) {
+ unsigned int frag_size;
+
+ if (tg3_alloc_rx_data(tp, tpr, RXD_OPAQUE_RING_STD, i,
+ &frag_size) < 0) {
netdev_warn(tp->dev,
"Using a smaller RX standard ring. Only "
"%d out of %d buffers were allocated "
@@ -7311,7 +7362,10 @@ static int tg3_rx_prodring_alloc(struct tg3 *tp,
}
for (i = 0; i < tp->rx_jumbo_pending; i++) {
- if (tg3_alloc_rx_data(tp, tpr, RXD_OPAQUE_RING_JUMBO, i) < 0) {
+ unsigned int frag_size;
+
+ if (tg3_alloc_rx_data(tp, tpr, RXD_OPAQUE_RING_JUMBO, i,
+ &frag_size) < 0) {
netdev_warn(tp->dev,
"Using a smaller RX jumbo ring. Only %d "
"out of %d buffers were allocated "
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 93865f8..7c85545 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2815,6 +2815,8 @@ struct tg3_rx_prodring_set {
struct ring_info *rx_jmb_buffers;
dma_addr_t rx_std_mapping;
dma_addr_t rx_jmb_mapping;
+ void *rx_page_addr;
+ unsigned int rx_page_size;
};
#define TG3_IRQ_MAX_VECS_RSS 5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists