[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251022204325.GA18380@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net>
Date: Wed, 22 Oct 2025 13:43:25 -0700
From: Aditya Garg <gargaditya@...ux.microsoft.com>
To: kys@...rosoft.com, haiyangz@...rosoft.com, wei.liu@...nel.org,
decui@...rosoft.com, andrew+netdev@...n.ch, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
longli@...rosoft.com, kotaranov@...rosoft.com, horms@...nel.org,
shradhagupta@...ux.microsoft.com, ssengar@...ux.microsoft.com,
ernis@...ux.microsoft.com, dipayanroy@...ux.microsoft.com,
shirazsaleem@...rosoft.com, gargaditya@...ux.microsoft.com,
linux-hyperv@...r.kernel.org, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-rdma@...r.kernel.org,
gargaditya@...rosoft.com
Subject: [RFC PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed
hardware limit
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.
Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
send other errors to free_sgl_ptr to free resources and record the tx
drop.
Co-developed-by: Dipayaan Roy <dipayanroy@...ux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@...ux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@...ux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 48 +++++++++++++++++--
include/net/mana/gdma.h | 6 ++-
include/net/mana/mana.h | 1 +
3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 0142fd98392c..8e23a87a779d 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cq = &apc->tx_qp[txq_idx].tx_cq;
tx_stats = &txq->stats;
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* GSO skb with Hardware SGE limit exceeded is not expected here
+ * as they are handled in mana_features_check() callback
+ */
+ if (skb_is_gso(skb))
+ netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
+ if (skb_linearize(skb)) {
+ netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+ skb_shinfo(skb)->nr_frags,
+ skb_is_gso(skb));
+ goto tx_drop_count;
+ }
+ }
+
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
@@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (err) {
(void)skb_dequeue_tail(&txq->pending_skbs);
+ mana_unmap_skb(skb, apc);
netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
- err = NETDEV_TX_BUSY;
- goto tx_busy;
+ if (err == -ENOSPC) {
+ err = NETDEV_TX_BUSY;
+ goto tx_busy;
+ }
+ goto free_sgl_ptr;
}
err = NETDEV_TX_OK;
@@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+ struct net_device *ndev,
+ netdev_features_t features)
+{
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* Exceeds HW SGE limit.
+ * GSO case:
+ * Disable GSO so the stack will software-segment the skb
+ * into smaller skbs that fit the SGE budget.
+ * Non-GSO case:
+ * The xmit path will attempt skb_linearize() as a fallback.
+ */
+ if (skb_is_gso(skb))
+ features &= ~NETIF_F_GSO_MASK;
+ }
+ return features;
+}
+
static void mana_get_stats64(struct net_device *ndev,
struct rtnl_link_stats64 *st)
{
@@ -838,6 +875,7 @@ static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
.ndo_select_queue = mana_select_queue,
+ .ndo_features_check = mana_features_check,
.ndo_start_xmit = mana_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
@@ -1606,7 +1644,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
return 0;
}
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
{
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..b35ecc58fbab 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -591,6 +591,9 @@ enum {
/* Driver can self reset on FPGA Reconfig EQE notification */
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +602,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
- GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..330e1bb088bb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
void mana_query_phy_stats(struct mana_port_context *apc);
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
extern struct dentry *mana_debugfs_root;
--
2.43.0
Powered by blists - more mailing lists