lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 18 Aug 2020 12:44:12 -0700
From:   David Awogbemila <awogbemila@...gle.com>
To:     netdev@...r.kernel.org
Cc:     Catherine Sullivan <csully@...gle.com>,
        Yangchun Fu <yangchun@...gle.com>,
        David Awogbemila <awogbemila@...gle.com>
Subject: [PATCH net-next 13/18] gve: Add rx buffer pagecnt bias.

From: Catherine Sullivan <csully@...gle.com>

Add a pagecnt bias field to rx buffer info struct to eliminate
needing to increment the atomic page ref count on every pass in the
rx hotpath.

We now keep track of whether the nic has the only reference to a page
by decrementing the bias instead of incrementing the atomic page ref
count, which could be expensive.
If the bias is equal to the pagecount, then the nic has the only
reference to that page. But, if the bias is less than the page count,
the networking stack is still using the page.
The pagecount should never be less than the bias.

Reviewed-by: Yangchun Fu <yangchun@...gle.com>
Signed-off-by: Catherine Sullivan <csully@...gle.com>
Signed-off-by: David Awogbemila <awogbemila@...gle.com>
---
 drivers/net/ethernet/google/gve/gve.h    |  1 +
 drivers/net/ethernet/google/gve/gve_rx.c | 47 ++++++++++++++++++------
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index c0f0b22c1ec0..8b1773c45cb6 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -50,6 +50,7 @@ struct gve_rx_slot_page_info {
 	struct page *page;
 	void *page_address;
 	u32 page_offset; /* offset to write to in page */
+	int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
 	bool can_flip; /* page can be flipped and reused */
 };
 
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index ca12f267d08a..c65615b9e602 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -23,6 +23,7 @@ static void gve_rx_free_buffer(struct device *dev,
 	dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) -
 				      page_info->page_offset);
 
+	page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
 	gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
 }
 
@@ -70,6 +71,9 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
 	page_info->page_offset = 0;
 	page_info->page_address = page_address(page);
 	slot->addr = cpu_to_be64(addr);
+
+	set_page_count(page, INT_MAX);
+	page_info->pagecnt_bias = INT_MAX;
 }
 
 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
@@ -347,21 +351,40 @@ static bool gve_rx_can_flip_buffers(struct net_device *netdev)
 #endif
 }
 
-static int gve_rx_can_recycle_buffer(struct page *page)
+static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
 {
-	int pagecount = page_count(page);
+	int pagecount = page_count(page_info->page);
 
 	/* This page is not being used by any SKBs - reuse */
-	if (pagecount == 1) {
+	if (pagecount == page_info->pagecnt_bias) {
 		return 1;
 	/* This page is still being used by an SKB - we can't reuse */
-	} else if (pagecount >= 2) {
+	} else if (pagecount > page_info->pagecnt_bias) {
 		return 0;
 	}
-	WARN(pagecount < 1, "Pagecount should never be < 1");
+	WARN(pagecount < page_info->pagecnt_bias, "Pagecount should never be less than the bias.");
 	return -1;
 }
 
+/* Update page reference not by incrementing the page count, but by
+ * decrementing the "bias" offset from page_count that determines
+ * whether the nic has the only reference.
+ */
+static void gve_rx_update_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
+{
+	page_info->pagecnt_bias--;
+	if (page_info->pagecnt_bias == 0) {
+		int pagecount = page_count(page_info->page);
+
+		/* If we have run out of bias - set it back up to INT_MAX
+		 * minus the existing refs.
+		 */
+		page_info->pagecnt_bias = INT_MAX - (pagecount);
+		/* Set pagecount back up to max */
+		set_page_count(page_info->page, INT_MAX);
+	}
+}
+
 static struct sk_buff *
 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 		      struct gve_rx_slot_page_info *page_info, u16 len,
@@ -373,11 +396,11 @@ gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 	if (!skb)
 		return NULL;
 
-	/* Optimistically stop the kernel from freeing the page by increasing
-	 * the page bias. We will check the refcount in refill to determine if
-	 * we need to alloc a new page.
+	/* Optimistically stop the kernel from freeing the page.
+	 * We will check again in refill to determine if we need to alloc a
+	 * new page.
 	 */
-	get_page(page_info->page);
+	gve_rx_update_pagecnt_bias(page_info);
 	page_info->can_flip = can_flip;
 
 	return skb;
@@ -400,7 +423,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
 		/* No point in recycling if we didn't get the skb */
 		if (skb) {
 			/* Make sure the networking stack can't free the page */
-			get_page(page_info->page);
+			gve_rx_update_pagecnt_bias(page_info);
 			gve_rx_flip_buffer(page_info, data_slot);
 		}
 	} else {
@@ -458,7 +481,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 		int recycle = 0;
 
 		if (can_flip) {
-			recycle = gve_rx_can_recycle_buffer(page_info->page);
+			recycle = gve_rx_can_recycle_buffer(page_info);
 			if (recycle < 0) {
 				gve_schedule_reset(priv);
 				return false;
@@ -548,7 +571,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 			 * owns half the page it is impossible to tell which half. Either
 			 * the whole page is free or it needs to be replaced.
 			 */
-			int recycle = gve_rx_can_recycle_buffer(page_info->page);
+			int recycle = gve_rx_can_recycle_buffer(page_info);
 
 			if (recycle < 0) {
 				gve_schedule_reset(priv);
-- 
2.28.0.220.ged08abb693-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ