[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250804231704.12309-1-mmc@linux.ibm.com>
Date: Mon, 4 Aug 2025 16:17:04 -0700
From: Mingming Cao <mmc@...ux.ibm.com>
To: netdev@...r.kernel.org
Cc: horms@...nel.org, bjking1@...ux.ibm.com, haren@...ux.ibm.com,
ricklind@...ux.ibm.com, mmc@...ux.ibm.com, kuba@...nel.org,
edumazet@...gle.com, pabeni@...hat.com, linuxppc-dev@...ts.ozlabs.org,
maddy@...ux.ibm.com, mpe@...erman.id.au
Subject: [PATCH v3 net-next] ibmvnic: Increase max subcrq indirect entries with fallback
POWER8 support a maximum of 16 subcrq indirect descriptor entries per
H_SEND_SUB_CRQ_INDIRECT call, while POWER9 and newer hypervisors
support up to 128 entries. Increasing the max number of indirect
descriptor entries improves batching efficiency and reduces
hcall overhead, which enhances throughput under large workload on POWER9+.
Currently, ibmvnic driver always uses a fixed number of max indirect
descriptor entries (16). send_subcrq_indirect() treats all hypervisor
errors the same:
- Cleanup and Drop the entire batch of descriptors.
- Return an error to the caller.
- Rely on TCP/IP retransmissions to recover.
- If the hypervisor returns H_PARAMETER (e.g., because 128
entries are not supported on POWER8), the driver will continue
to drop batches, resulting in unnecessary packet loss.
In this patch:
Raise the default maximum indirect entries to 128 to improve ibmvnic
batching on morden platform. But also gracefully fall back to
16 entries for Power 8 systems.
Since there is no VIO interface to query the hypervisor’s supported
limit, vnic handles send_subcrq_indirect() H_PARAMETER errors:
- On first H_PARAMETER failure, log the failure context
- Reduce max_indirect_entries to 16 and allow the single batch to drop.
- Subsequent calls automatically use the correct lower limit,
avoiding repeated drops.
The goal is to optimizes performance on modern systems while handles
falling back for older POWER8 hypervisors.
Performance shows 40% improvements with MTU (1500) on largework load.
--------------------------------------
Changes since v2:
link to v2: https://www.spinics.net/lists/netdev/msg1104669.html
-- was Patch 4 from a patch series v2. v2 introduced a module parameter
for backward compatibility. Based on review feedback, This patch handles
older systems fall back case without adding a module parameter.
Signed-off-by: Mingming Cao <mmc@...ux.ibm.com>
Reviewed-by: Brian King <bjking1@...ux.ibm.com>
Reviewed-by: Haren Myneni <haren@...ux.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 56 ++++++++++++++++++++++++++----
drivers/net/ethernet/ibm/ibmvnic.h | 6 ++--
2 files changed, 53 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index eec971567aac..28d64e4b8071 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -843,7 +843,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
/* if send_subcrq_indirect queue is full, flush to VIOS */
- if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+ if (ind_bufp->index == adapter->cur_max_ind_descs ||
i == count - 1) {
lpar_rc =
send_subcrq_indirect(adapter, handle,
@@ -862,6 +862,19 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
failure:
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
+
+ /* Detect platform limit H_PARAMETER */
+ if (lpar_rc == H_PARAMETER &&
+ adapter->cur_max_ind_descs > IBMVNIC_MAX_IND_DESC_MIN) {
+ netdev_info(adapter->netdev,
+ "H_PARAMETER, set ind desc to safe limit %u\n",
+ IBMVNIC_MAX_IND_DESC_MIN);
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESC_MIN;
+ }
+
+ /* for all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
for (i = ind_bufp->index - 1; i >= 0; --i) {
struct ibmvnic_rx_buff *rx_buff;
@@ -2381,16 +2394,33 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
rc = send_subcrq_direct(adapter, handle,
(u64 *)ind_bufp->indir_arr);
- if (rc)
+ if (rc) {
+ dev_err_ratelimited(&adapter->vdev->dev,
+ "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n",
+ rc, entries, &dma_addr, handle);
+ /* Detect platform limit H_PARAMETER */
+ if (rc == H_PARAMETER &&
+ adapter->cur_max_ind_descs > IBMVNIC_MAX_IND_DESC_MIN) {
+ netdev_info(adapter->netdev,
+ "H_PARAMETER, set ind descs to safe limit %u\n",
+ IBMVNIC_MAX_IND_DESC_MIN);
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESC_MIN;
+ }
+
+ /* for all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
- else
+ } else {
ind_bufp->index = 0;
+ }
return rc;
}
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ u32 cur_max_ind_descs = adapter->cur_max_ind_descs;
int queue_num = skb_get_queue_mapping(skb);
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
@@ -2590,7 +2620,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.n_crq_elem = num_entries;
tx_buff->num_entries = num_entries;
/* flush buffer if current entry can not fit */
- if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
+ if (num_entries + ind_bufp->index > cur_max_ind_descs) {
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_flush_err;
@@ -2603,7 +2633,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ind_bufp->index += num_entries;
if (__netdev_tx_sent_queue(txq, skb->len,
netdev_xmit_more() &&
- ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
+ ind_bufp->index < cur_max_ind_descs)) {
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_err;
@@ -4006,7 +4036,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
}
dma_free_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
scrq->ind_buf.indir_arr,
scrq->ind_buf.indir_dma);
@@ -4063,7 +4093,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
scrq->ind_buf.indir_arr =
dma_alloc_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
&scrq->ind_buf.indir_dma,
GFP_KERNEL);
@@ -6369,6 +6399,17 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
rc = reset_sub_crq_queues(adapter);
}
} else {
+ if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ /* post migrtione reset the max
+ * indirect descriptors per hcall to be default max
+ * (e.g p8 ->p10)
+ * if the destination is on the platform supports
+ * do not support max (e.g. p10->p8) the threshold
+ * will be reduced to safe min limit for p8 later
+ */
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESC_MAX;
+ }
+
rc = init_sub_crqs(adapter);
}
@@ -6520,6 +6561,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->wait_for_reset = false;
adapter->last_reset_time = jiffies;
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESC_MAX;
rc = register_netdev(netdev);
if (rc) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 246ddce753f9..829a16116812 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -29,8 +29,9 @@
#define IBMVNIC_BUFFS_PER_POOL 100
#define IBMVNIC_MAX_QUEUES 16
#define IBMVNIC_MAX_QUEUE_SZ 4096
-#define IBMVNIC_MAX_IND_DESCS 16
-#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
+#define IBMVNIC_MAX_IND_DESC_MAX 128
+#define IBMVNIC_MAX_IND_DESC_MIN 16
+#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESC_MAX * 32)
#define IBMVNIC_TSO_BUF_SZ 65536
#define IBMVNIC_TSO_BUFS 64
@@ -930,6 +931,7 @@ struct ibmvnic_adapter {
struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
dma_addr_t ip_offload_ctrl_tok;
u32 msg_enable;
+ u32 cur_max_ind_descs;
/* Vital Product Data (VPD) */
struct ibmvnic_vpd *vpd;
--
2.39.3 (Apple Git-146)
Powered by blists - more mailing lists