[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1350647114-6768-10-git-send-email-jeffrey.t.kirsher@intel.com>
Date: Fri, 19 Oct 2012 04:45:09 -0700
From: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
To: davem@...emloft.net
Cc: Alexander Duyck <alexander.h.duyck@...el.com>,
netdev@...r.kernel.org, gospo@...hat.com, sassmann@...hat.com,
Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Subject: [net-next 09/14] igb: Map entire page and sync half instead of mapping and unmapping half pages
From: Alexander Duyck <alexander.h.duyck@...el.com>
This change makes it so that we map the entire page and just sync half of
it for the device at a time. The advantage to this approach is that we can
avoid the locking on map/unmap seen in many IOMMU implementations.
Signed-off-by: Alexander Duyck <alexander.h.duyck@...el.com>
Tested-by: Aaron Brown <aaron.f.brown@...el.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
---
drivers/net/ethernet/intel/igb/igb.h | 1 +
drivers/net/ethernet/intel/igb/igb_ethtool.c | 26 ++--
drivers/net/ethernet/intel/igb/igb_main.c | 211 +++++++++++++++++++++------
3 files changed, 181 insertions(+), 57 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 72ab9ac..1d15bb0 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -239,6 +239,7 @@ struct igb_ring {
/* everything past this point are written often */
u16 next_to_clean ____cacheline_aligned_in_smp;
u16 next_to_use;
+ u16 next_to_alloc;
union {
/* TX */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 0faac42..96c6df6 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1694,7 +1694,7 @@ static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
frame_size >>= 1;
- data = kmap(rx_buffer->page) + rx_buffer->page_offset;
+ data = kmap(rx_buffer->page);
if (data[3] != 0xFF ||
data[frame_size + 10] != 0xBE ||
@@ -1713,9 +1713,7 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc;
struct igb_rx_buffer *rx_buffer_info;
struct igb_tx_buffer *tx_buffer_info;
- struct netdev_queue *txq;
u16 rx_ntc, tx_ntc, count = 0;
- unsigned int total_bytes = 0, total_packets = 0;
/* initialize next to clean and descriptor values */
rx_ntc = rx_ring->next_to_clean;
@@ -1726,21 +1724,24 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
/* check rx buffer */
rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
- /* unmap rx buffer, will be remapped by alloc_rx_buffers */
- dma_unmap_single(rx_ring->dev,
- rx_buffer_info->dma,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- rx_buffer_info->dma = 0;
+ /* sync Rx buffer for CPU read */
+ dma_sync_single_for_cpu(rx_ring->dev,
+ rx_buffer_info->dma,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
/* verify contents of skb */
if (igb_check_lbtest_frame(rx_buffer_info, size))
count++;
+ /* sync Rx buffer for device write */
+ dma_sync_single_for_device(rx_ring->dev,
+ rx_buffer_info->dma,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+
/* unmap buffer on tx side */
tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
- total_bytes += tx_buffer_info->bytecount;
- total_packets += tx_buffer_info->gso_segs;
igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
/* increment rx/tx next to clean counters */
@@ -1755,8 +1756,7 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
}
- txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
- netdev_tx_completed_queue(txq, total_packets, total_bytes);
+ netdev_tx_reset_queue(txring_txq(tx_ring));
/* re-map buffers to ring, store next to clean values */
igb_alloc_rx_buffers(rx_ring, count);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 9e38f14..18ad18f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2785,6 +2785,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
if (!rx_ring->desc)
goto err;
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -3312,16 +3313,16 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
for (i = 0; i < rx_ring->count; i++) {
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
- if (buffer_info->dma)
- dma_unmap_page(rx_ring->dev,
- buffer_info->dma,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- buffer_info->dma = 0;
- if (buffer_info->page)
- __free_page(buffer_info->page);
+ if (!buffer_info->page)
+ continue;
+
+ dma_unmap_page(rx_ring->dev,
+ buffer_info->dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __free_page(buffer_info->page);
+
buffer_info->page = NULL;
- buffer_info->page_offset = 0;
}
size = sizeof(struct igb_rx_buffer) * rx_ring->count;
@@ -3330,6 +3331,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
@@ -5828,6 +5830,104 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
return !!budget;
}
+/**
+ * igb_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void igb_reuse_rx_page(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *old_buff)
+{
+ struct igb_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
+ old_buff->page_offset,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+}
+
+/**
+ * igb_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool igb_add_rx_frag(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+
+ if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+#ifdef CONFIG_IGB_PTP
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+ va += IGB_TS_HDR_LEN;
+ size -= IGB_TS_HDR_LEN;
+ }
+
+#endif
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* we can reuse buffer as-is, just make sure it is local */
+ if (likely(page_to_nid(page) == numa_node_id()))
+ return true;
+
+ /* this page cannot be reused so discard it */
+ put_page(page);
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, PAGE_SIZE / 2);
+
+ /* avoid re-using remote pages */
+ if (unlikely(page_to_nid(page) != numa_node_id()))
+ return false;
+
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= PAGE_SIZE / 2;
+
+ /*
+ * since we are the only owner of the page and we need to
+ * increment it, just set the value to 2 in order to avoid
+ * an unnecessary locked operation
+ */
+ atomic_set(&page->_count, 2);
+
+ return true;
+}
+
static inline void igb_rx_checksum(struct igb_ring *ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
@@ -5985,6 +6085,7 @@ static unsigned int igb_get_headlen(unsigned char *data,
/**
* igb_pull_tail - igb specific version of skb_pull_tail
* @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being adjusted
*
* This function is an igb specific version of __pskb_pull_tail. The
@@ -6131,7 +6232,6 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
struct igb_ring *rx_ring = q_vector->rx.ring;
union e1000_adv_rx_desc *rx_desc;
struct sk_buff *skb = rx_ring->skb;
- const int current_node = numa_node_id();
unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
u16 i = rx_ring->next_to_clean;
@@ -6186,20 +6286,25 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
prefetchw(skb->data);
}
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- buffer_info->page_offset,
- le16_to_cpu(rx_desc->wb.upper.length),
- PAGE_SIZE / 2);
-
- if ((page_count(buffer_info->page) != 1) ||
- (page_to_nid(buffer_info->page) != current_node))
- buffer_info->page = NULL;
- else
- get_page(buffer_info->page);
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ buffer_info->dma,
+ buffer_info->page_offset,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+
+ /* pull page into skb */
+ if (igb_add_rx_frag(rx_ring, buffer_info, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ igb_reuse_rx_page(rx_ring, buffer_info);
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, buffer_info->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ }
- dma_unmap_page(rx_ring->dev, buffer_info->dma,
- PAGE_SIZE / 2, DMA_FROM_DEVICE);
- buffer_info->dma = 0;
+ /* clear contents of buffer_info */
+ buffer_info->page = NULL;
if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))
goto next_desc;
@@ -6259,32 +6364,36 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
struct igb_rx_buffer *bi)
{
struct page *page = bi->page;
- dma_addr_t dma = bi->dma;
- unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
+ dma_addr_t dma;
- if (dma)
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
return true;
- if (!page) {
- page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL);
- if (unlikely(!page)) {
- rx_ring->rx_stats.alloc_failed++;
- return false;
- }
- bi->page = page;
+ /* alloc new page for storage */
+ page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL);
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
}
- dma = dma_map_page(rx_ring->dev, page,
- page_offset, PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ /*
+ * if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_page(page);
+
rx_ring->rx_stats.alloc_failed++;
return false;
}
bi->dma = dma;
- bi->page_offset = page_offset;
+ bi->page = page;
+ bi->page_offset = 0;
return true;
}
@@ -6299,17 +6408,23 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
rx_desc = IGB_RX_DESC(rx_ring, i);
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;
- while (cleaned_count--) {
+ do {
if (!igb_alloc_mapped_page(rx_ring, bi))
break;
- /* Refresh the desc even if buffer_addrs didn't change
- * because each write-back erases this info. */
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ /*
+ * Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++;
@@ -6322,17 +6437,25 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
/* clear the hdr_addr for the next_to_use descriptor */
rx_desc->read.hdr_addr = 0;
- }
+
+ cleaned_count--;
+ } while (cleaned_count);
i += rx_ring->count;
if (rx_ring->next_to_use != i) {
+ /* record the next descriptor to use */
rx_ring->next_to_use = i;
- /* Force memory writes to complete before letting h/w
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+ /*
+ * Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
- * such as IA-64). */
+ * such as IA-64).
+ */
wmb();
writel(i, rx_ring->tail);
}
--
1.7.11.7
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists