lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sat, 23 Dec 2023 03:55:24 +0100
From: Alexander Lobakin <aleksander.lobakin@...el.com>
To: "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>
Cc: Alexander Lobakin <aleksander.lobakin@...el.com>,
	Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
	Michal Kubiak <michal.kubiak@...el.com>,
	Larysa Zaremba <larysa.zaremba@...el.com>,
	Alexei Starovoitov <ast@...nel.org>,
	Daniel Borkmann <daniel@...earbox.net>,
	Willem de Bruijn <willemdebruijn.kernel@...il.com>,
	intel-wired-lan@...ts.osuosl.org,
	netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH RFC net-next 04/34] libie: support different types of buffers for Rx

Unlike previous generations, idpf requires more buffer types for optimal
performance. This includes: header buffers, short buffers, and
no-overhead buffers (w/o headroom and tailroom, for TCP zerocopy when
the header split is enabled).
Introduce libie Rx buffer type and calculate page_pool params
accordingly. All the HW-related details like buffer alignment are still
accounted. For the header buffers, pick 256 bytes as in most places in
the kernel (have you ever seen frames with bigger headers?).

Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
---
 drivers/net/ethernet/intel/libie/rx.c | 107 +++++++++++++++++++++++---
 include/linux/net/intel/libie/rx.h    |  19 +++++
 2 files changed, 115 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/libie/rx.c b/drivers/net/ethernet/intel/libie/rx.c
index 610f16043bcf..3d3b19d2b40d 100644
--- a/drivers/net/ethernet/intel/libie/rx.c
+++ b/drivers/net/ethernet/intel/libie/rx.c
@@ -6,14 +6,14 @@
 /* Rx buffer management */
 
 /**
- * libie_rx_hw_len - get the actual buffer size to be passed to HW
+ * libie_rx_hw_len_mtu - get the actual buffer size to be passed to HW
  * @pp: &page_pool_params of the netdev to calculate the size for
  *
  * Return: HW-writeable length per one buffer to pass it to the HW accounting:
  * MTU the @dev has, HW required alignment, minimum and maximum allowed values,
  * and system's page size.
  */
-static u32 libie_rx_hw_len(const struct page_pool_params *pp)
+static u32 libie_rx_hw_len_mtu(const struct page_pool_params *pp)
 {
 	u32 len;
 
@@ -24,6 +24,96 @@ static u32 libie_rx_hw_len(const struct page_pool_params *pp)
 	return len;
 }
 
+/**
+ * libie_rx_hw_len_truesize - get the short buffer size to be passed to HW
+ * @pp: &page_pool_params of the netdev to calculate the size for
+ * @truesize: desired truesize for the buffers
+ *
+ * Return: HW-writeable length per one buffer to pass it to the HW ignoring the
+ * MTU and closest to the passed truesize. Can be used for "short" buffer
+ * queues to fragment pages more efficiently.
+ */
+static u32 libie_rx_hw_len_truesize(const struct page_pool_params *pp,
+				    u32 truesize)
+{
+	u32 min, len;
+
+	min = SKB_HEAD_ALIGN(pp->offset + LIBIE_RX_BUF_LEN_ALIGN);
+	truesize = clamp(roundup_pow_of_two(truesize), roundup_pow_of_two(min),
+			 PAGE_SIZE << LIBIE_RX_PAGE_ORDER);
+
+	len = SKB_WITH_OVERHEAD(truesize - pp->offset);
+	len = ALIGN_DOWN(len, LIBIE_RX_BUF_LEN_ALIGN);
+	len = clamp(len, LIBIE_MIN_RX_BUF_LEN, pp->max_len);
+
+	return len;
+}
+
+static void libie_rx_page_pool_params(struct libie_buf_queue *bq,
+				      struct page_pool_params *pp)
+{
+	pp->offset = LIBIE_SKB_HEADROOM;
+	/* HW-writeable / syncable length per one page */
+	pp->max_len = LIBIE_RX_BUF_LEN(pp->offset);
+
+	/* HW-writeable length per buffer */
+	switch (bq->type) {
+	case LIBIE_RX_BUF_MTU:
+		bq->rx_buf_len = libie_rx_hw_len_mtu(pp);
+		break;
+	case LIBIE_RX_BUF_SHORT:
+		bq->rx_buf_len = libie_rx_hw_len_truesize(pp, bq->truesize);
+		break;
+	case LIBIE_RX_BUF_HDR:
+		bq->rx_buf_len = ALIGN(LIBIE_MAX_HEAD, LIBIE_RX_BUF_LEN_ALIGN);
+		break;
+	default:
+		break;
+	}
+
+	/* Buffer size to allocate */
+	bq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp->offset +
+							 bq->rx_buf_len));
+}
+
+/**
+ * libie_rx_page_pool_params_zc - calculate params without the stack overhead
+ * @bq: buffer queue to calculate the size for
+ * @pp: &page_pool_params of the netdev
+ *
+ * Adjusts the PP params to exclude the stack overhead and sets both the buffer
+ * lengh and the truesize, which are equal for the data buffers. Note that this
+ * requires separate header buffers to be always active and account the
+ * overhead.
+ * With the MTU == ``PAGE_SIZE``, this allows the kernel to enable the zerocopy
+ * mode.
+ */
+static bool libie_rx_page_pool_params_zc(struct libie_buf_queue *bq,
+					 struct page_pool_params *pp)
+{
+	u32 mtu;
+
+	pp->offset = 0;
+	pp->max_len = PAGE_SIZE << LIBIE_RX_PAGE_ORDER;
+
+	switch (bq->type) {
+	case LIBIE_RX_BUF_MTU:
+		mtu = READ_ONCE(pp->netdev->mtu);
+		break;
+	case LIBIE_RX_BUF_SHORT:
+		mtu = bq->truesize;
+		break;
+	default:
+		return false;
+	}
+
+	bq->rx_buf_len = clamp(roundup_pow_of_two(mtu), LIBIE_RX_BUF_LEN_ALIGN,
+			       pp->max_len);
+	bq->truesize = bq->rx_buf_len;
+
+	return true;
+}
+
 /**
  * libie_rx_page_pool_create - create a PP with the default libie settings
  * @bq: buffer queue struct to fill
@@ -43,17 +133,12 @@ int libie_rx_page_pool_create(struct libie_buf_queue *bq,
 		.netdev		= napi->dev,
 		.napi		= napi,
 		.dma_dir	= DMA_FROM_DEVICE,
-		.offset		= LIBIE_SKB_HEADROOM,
 	};
 
-	/* HW-writeable / syncable length per one page */
-	pp.max_len = LIBIE_RX_BUF_LEN(pp.offset);
-
-	/* HW-writeable length per buffer */
-	bq->rx_buf_len = libie_rx_hw_len(&pp);
-	/* Buffer size to allocate */
-	bq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset +
-							 bq->rx_buf_len));
+	if (!bq->hsplit)
+		libie_rx_page_pool_params(bq, &pp);
+	else if (!libie_rx_page_pool_params_zc(bq, &pp))
+		return -EINVAL;
 
 	bq->pp = page_pool_create(&pp);
 
diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h
index 0d6bce19ad6b..87ad8f9e89c7 100644
--- a/include/linux/net/intel/libie/rx.h
+++ b/include/linux/net/intel/libie/rx.h
@@ -19,6 +19,8 @@
 #define LIBIE_MAX_HEADROOM	LIBIE_SKB_HEADROOM
 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
 #define LIBIE_RX_LL_LEN		(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+/* Maximum supported L2-L4 header length */
+#define LIBIE_MAX_HEAD		256
 
 /* Always use order-0 pages */
 #define LIBIE_RX_PAGE_ORDER	0
@@ -64,6 +66,18 @@ struct libie_rx_buffer {
 	u32			truesize;
 };
 
+/**
+ * enum libie_rx_buf_type - enum representing types of Rx buffers
+ * @LIBIE_RX_BUF_MTU: buffer size is determined by MTU
+ * @LIBIE_RX_BUF_SHORT: buffer size is smaller than MTU, for short frames
+ * @LIBIE_RX_BUF_HDR: buffer size is ```LIBIE_MAX_HEAD```-sized, for headers
+ */
+enum libie_rx_buf_type {
+	LIBIE_RX_BUF_MTU	= 0U,
+	LIBIE_RX_BUF_SHORT,
+	LIBIE_RX_BUF_HDR,
+};
+
 /**
  * struct libie_buf_queue - structure representing a buffer queue
  * @pp: &page_pool for buffer management
@@ -71,6 +85,8 @@ struct libie_rx_buffer {
  * @truesize: size to allocate per buffer, w/overhead
  * @count: number of descriptors/buffers the queue has
  * @rx_buf_len: HW-writeable length per each buffer
+ * @type: type of the buffers this queue has
+ * @hsplit: flag whether header split is enabled
  */
 struct libie_buf_queue {
 	struct page_pool	*pp;
@@ -81,6 +97,9 @@ struct libie_buf_queue {
 
 	/* Cold fields */
 	u32			rx_buf_len;
+	enum libie_rx_buf_type	type:2;
+
+	bool			hsplit:1;
 };
 
 int libie_rx_page_pool_create(struct libie_buf_queue *bq,
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ