lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 27 Sep 2022 13:36:08 -0700
From:   Saeed Mahameed <>
To:     "David S. Miller" <>,
        Jakub Kicinski <>,
        Paolo Abeni <>,
        Eric Dumazet <>
Cc:     Saeed Mahameed <>,,
        Tariq Toukan <>,
        Maxim Mikityanskiy <>
Subject: [net-next 13/16] net/mlx5e: xsk: Fix SKB headroom calculation in validation

From: Maxim Mikityanskiy <>

In a typical scenario, if an XSK socket is opened first, then an XDP
program is attached, mlx5e_validate_xsk_param will be called twice:
first on XSK bind, second on channel restart caused by enabling XDP. The
validation includes a call to mlx5e_rx_is_linear_skb, which checks the
presence of the XDP program.

The above means that mlx5e_rx_is_linear_skb might return true the first
time, but false the second time, as mlx5e_rx_get_linear_sz_skb's return
value will increase, because of a different headroom used with XDP.

As XSK RQs never exist without XDP, it would make sense to trick
mlx5e_rx_get_linear_sz_skb into thinking XDP is enabled at the first
check as well. This way, if MTU is too big, it would be detected on XSK
bind, without giving false hope to the userspace application.

However, it turns out that this check is too restrictive in the first
place. SKBs created on XDP_PASS on XSK RQs don't have any headroom. That
means that big MTUs filtered out on the first and the second checks
might actually work.

So, address this issue in the proper way, but taking into account the
absence of the SKB headroom on XSK RQs, when calculating the buffer

Signed-off-by: Maxim Mikityanskiy <>
Reviewed-by: Tariq Toukan <>
Reviewed-by: Saeed Mahameed <>
Signed-off-by: Saeed Mahameed <>
 .../ethernet/mellanox/mlx5/core/en/params.c   | 23 ++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index c9a4a507a168..5dd3567d02d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -24,24 +24,21 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
 	return headroom;
-static u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
-				    struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+				      struct mlx5e_xsk_param *xsk)
 	u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
-	return linear_rq_headroom + hw_mtu;
+	return xsk->headroom + hw_mtu;
-static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
-				      struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
-	return mlx5e_rx_get_min_frag_sz(params, xsk);
+	/* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+	u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+	u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params)
-	return MLX5_SKB_FRAG_SZ(mlx5e_rx_get_min_frag_sz(params, NULL));
+	return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
 static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params,
@@ -57,7 +54,7 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params,
 	if (params->xdp_prog)
 		return PAGE_SIZE;
-	return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params));
+	return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
 u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
@@ -77,7 +74,7 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
 	/* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
 	 * must fit into a CPU page.
-	if (mlx5e_rx_get_linear_sz_skb(params) > PAGE_SIZE)
+	if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
 		return false;
 	/* XSK frames must be big enough to hold the packet data. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 84cd86ff64d4..f8d45360a643 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4009,7 +4009,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
 			 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
 			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
-			max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+			max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
 			max_mtu = min(max_mtu_frame, max_mtu_page);
 			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",

Powered by blists - more mailing lists