lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231113130041.58124-7-linyunsheng@huawei.com>
Date:   Mon, 13 Nov 2023 21:00:38 +0800
From:   Yunsheng Lin <linyunsheng@...wei.com>
To:     <davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>
CC:     <netdev@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        Yunsheng Lin <linyunsheng@...wei.com>,
        Ayush Sawal <ayush.sawal@...lsio.com>,
        Eric Dumazet <edumazet@...gle.com>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        Ilias Apalodimas <ilias.apalodimas@...aro.org>,
        Boris Pismenny <borisp@...dia.com>,
        John Fastabend <john.fastabend@...il.com>
Subject: [PATCH RFC 6/8] skbuff: always try to do page pool frag reference counting

As we have unified the frag_count handling in [1], we can
do frag reference counting instead of native page reference
counting whenever possible in net stack, in order to enable
the best possibility of recycling as we assume other
subsystem other than page pool is also hodling on to a page
if page_ref_count(page) != 1, so we stop the recycling and
release the page from page pool.

As the page from the devmem, we reuse most of the fields in
'struct page' for devmem in order to have unified handling
for both normal memory and devmem, but the meta data is not
really tied to mm, so we can't really use page->_refcount
as reference counting for devmem, instead it relies on page
pool's frag reference counting.

After this patch, pp_frag_count is not appropriate name as
we don't ensure only one user holding on to a frag, we may
rename it to frag_refcount in the future to avoid confusion.

1. https://lore.kernel.org/all/20231020095952.11055-1-linyunsheng@huawei.com/

Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
---
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c       |  2 +-
 drivers/net/ethernet/sun/cassini.c                  |  4 ++--
 drivers/net/veth.c                                  |  2 +-
 include/linux/skbuff.h                              | 12 +++++++++---
 include/net/page_pool/types.h                       | 13 +++++++++++++
 net/core/skbuff.c                                   |  6 +++---
 net/tls/tls_device_fallback.c                       |  2 +-
 7 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 6482728794dd..87fda3e9d7d1 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -1658,7 +1658,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
 	for (i = 0; i < record->num_frags; i++) {
 		skb_shinfo(nskb)->frags[i] = record->frags[i];
 		/* increase the frag ref count */
-		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+		skb_frag_ref(nskb, i);
 	}
 
 	skb_shinfo(nskb)->nr_frags = record->num_frags;
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index b317b9486455..8b818809c721 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1999,7 +1999,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		skb->len      += hlen - swivel;
 
 		skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
-		__skb_frag_ref(frag);
+		__skb_frag_ref(frag, skb->pp_recycle);
 
 		/* any more data? */
 		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2023,7 +2023,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 			frag++;
 
 			skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
-			__skb_frag_ref(frag);
+			__skb_frag_ref(frag, skb->pp_recycle);
 			RX_USED_ADD(page, hlen + cp->crc_size);
 		}
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9980517ed8b0..8ee323a1184b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -724,7 +724,7 @@ static void veth_xdp_get(struct xdp_buff *xdp)
 		return;
 
 	for (i = 0; i < sinfo->nr_frags; i++)
-		__skb_frag_ref(&sinfo->frags[i]);
+		__skb_frag_ref(&sinfo->frags[i], false);
 }
 
 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1889b0968be0..b77043a2b446 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
 #endif
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/page_pool/types.h>
 
 /**
  * DOC: skb checksums
@@ -3429,9 +3430,14 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
  *
  * Takes an additional reference on the paged fragment @frag.
  */
-static inline void __skb_frag_ref(skb_frag_t *frag)
+static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
 {
-	page_ref_inc(skb_frag_page(frag));
+	struct page *page = skb_frag_page(frag);
+
+	if (page_pool_frag_ref(page, recycle))
+		return;
+
+	page_ref_inc(page);
 }
 
 /**
@@ -3443,7 +3449,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag)
  */
 static inline void skb_frag_ref(struct sk_buff *skb, int f)
 {
-	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+	__skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
 }
 
 bool napi_pp_put_page(struct page *page, bool napi_safe);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 52e4cf98ebc6..ea4e654168bd 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -275,6 +275,19 @@ static inline bool is_page_pool_compiled_in(void)
 #endif
 }
 
+static inline bool page_pool_frag_ref(struct page *page, bool recycle)
+{
+	if (recycle && (page->pp_magic & ~0x3UL) == PP_SIGNATURE) {
+		atomic_long_inc(&page->pp_frag_count);
+		return true;
+	}
+
+	BUG_ON((page->pp_magic & ~0x3UL) == PP_SIGNATURE &&
+	       page->pp->mp_ops);
+
+	return false;
+}
+
 /* Caller must provide appropriate safe context, e.g. NAPI. */
 void page_pool_update_nid(struct page_pool *pool, int new_nid);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ada3da4fe221..a33df6e49694 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4046,7 +4046,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 			to++;
 
 		} else {
-			__skb_frag_ref(fragfrom);
+			__skb_frag_ref(fragfrom, skb->pp_recycle);
 			skb_frag_page_copy(fragto, fragfrom);
 			skb_frag_off_copy(fragto, fragfrom);
 			skb_frag_size_set(fragto, todo);
@@ -4695,7 +4695,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 			}
 
 			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
-			__skb_frag_ref(nskb_frag);
+			__skb_frag_ref(nskb_frag, nskb->pp_recycle);
 			size = skb_frag_size(nskb_frag);
 
 			if (pos < offset) {
@@ -5830,7 +5830,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 	 * since we set nr_frags to 0.
 	 */
 	for (i = 0; i < from_shinfo->nr_frags; i++)
-		__skb_frag_ref(&from_shinfo->frags[i]);
+		__skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
 
 	to->truesize += delta;
 	to->len += len;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4e7228f275fa..d4000b4a1f7d 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -277,7 +277,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
 	for (i = 0; remaining > 0; i++) {
 		skb_frag_t *frag = &record->frags[i];
 
-		__skb_frag_ref(frag);
+		__skb_frag_ref(frag, false);
 		sg_set_page(sg_in + i, skb_frag_page(frag),
 			    skb_frag_size(frag), skb_frag_off(frag));
 
-- 
2.33.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ