[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250721080103.30964-4-jiawenwu@trustnetic.com>
Date: Mon, 21 Jul 2025 16:01:03 +0800
From: Jiawen Wu <jiawenwu@...stnetic.com>
To: netdev@...r.kernel.org,
Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>
Cc: Mengyuan Lou <mengyuanlou@...-swift.com>,
Jiawen Wu <jiawenwu@...stnetic.com>
Subject: [PATCH net-next v2 3/3] net: wangxun: support to use adaptive RX coalescing
Support to turn on/off adaptive RX coalesce. When adaptive RX coalesce
is on, update the dynamic ITR value based on statistics.
Signed-off-by: Jiawen Wu <jiawenwu@...stnetic.com>
---
.../net/ethernet/wangxun/libwx/wx_ethtool.c | 10 +-
drivers/net/ethernet/wangxun/libwx/wx_lib.c | 208 ++++++++++++++++++
drivers/net/ethernet/wangxun/libwx/wx_type.h | 9 +
.../net/ethernet/wangxun/libwx/wx_vf_lib.c | 2 +-
.../net/ethernet/wangxun/libwx/wx_vf_lib.h | 1 +
.../net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 3 +-
.../ethernet/wangxun/txgbe/txgbe_ethtool.c | 3 +-
7 files changed, 232 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
index ebef99185bca..f2d888825659 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
@@ -303,6 +303,9 @@ int wx_get_coalesce(struct net_device *netdev,
else
ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2;
+ if (wx->rx_itr_setting == 1)
+ ec->use_adaptive_rx_coalesce = 1;
+
/* if in mixed tx/rx queues per vector mode, report only rx settings */
if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count)
return 0;
@@ -363,10 +366,15 @@ int wx_set_coalesce(struct net_device *netdev,
(ec->tx_coalesce_usecs > (max_eitr >> 2)))
return -EINVAL;
+ if (ec->use_adaptive_rx_coalesce) {
+ wx->rx_itr_setting = 1;
+ return 0;
+ }
+
if (ec->rx_coalesce_usecs > 1)
wx->rx_itr_setting = ec->rx_coalesce_usecs << 2;
else
- wx->rx_itr_setting = ec->rx_coalesce_usecs;
+ wx->rx_itr_setting = rx_itr_param;
if (wx->rx_itr_setting != 1)
rx_itr_param = wx->rx_itr_setting;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 723785ef87bb..ebc4281a8760 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -16,6 +16,7 @@
#include "wx_lib.h"
#include "wx_ptp.h"
#include "wx_hw.h"
+#include "wx_vf_lib.h"
/* Lookup table mapping the HW PTYPE to the bit field for decoding */
static struct wx_dec_ptype wx_ptype_lookup[256] = {
@@ -832,6 +833,211 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
return !!budget;
}
+static void wx_update_itr(struct wx_q_vector *q_vector,
+ struct wx_ring_container *ring_container)
+{
+ unsigned int itr = WX_ITR_ADAPTIVE_MIN_USECS |
+ WX_ITR_ADAPTIVE_LATENCY;
+ unsigned int avg_wire_size, packets, bytes;
+ unsigned long next_update = jiffies;
+
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!ring_container->ring)
+ return;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, ring_container->next_update))
+ goto clear_counts;
+
+ packets = ring_container->total_packets;
+
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * When this occurs just tick up our delay by the minimum value
+ * and hope that this extra delay will prevent us from being called
+ * without any work on our queue.
+ */
+ if (!packets) {
+ itr = (q_vector->itr >> 2) + WX_ITR_ADAPTIVE_MIN_INC;
+ if (itr > WX_ITR_ADAPTIVE_MAX_USECS)
+ itr = WX_ITR_ADAPTIVE_MAX_USECS;
+ itr += ring_container->itr & WX_ITR_ADAPTIVE_LATENCY;
+ goto clear_counts;
+ }
+
+ bytes = ring_container->total_bytes;
+
+ /* If packets are less than 4 or bytes are less than 9000 assume
+ * insufficient data to use bulk rate limiting approach. We are
+ * likely latency driven.
+ */
+ if (packets < 4 && bytes < 9000) {
+ itr = WX_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+
+ /* Between 4 and 48 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
+ */
+ if (packets < 48) {
+ itr = (q_vector->itr >> 2) + WX_ITR_ADAPTIVE_MIN_INC;
+ if (itr > WX_ITR_ADAPTIVE_MAX_USECS)
+ itr = WX_ITR_ADAPTIVE_MAX_USECS;
+ goto clear_counts;
+ }
+
+ /* Between 48 and 96 is our "goldilocks" zone where we are working
+ * out "just right". Just report that our current ITR is good for us.
+ */
+ if (packets < 96) {
+ itr = q_vector->itr >> 2;
+ goto clear_counts;
+ }
+
+ /* If packet count is 96 or greater we are likely looking at a slight
+ * overrun of the delay we want. Try halving our delay to see if that
+ * will cut the number of packets in half per interrupt.
+ */
+ if (packets < 256) {
+ itr = q_vector->itr >> 3;
+ if (itr < WX_ITR_ADAPTIVE_MIN_USECS)
+ itr = WX_ITR_ADAPTIVE_MIN_USECS;
+ goto clear_counts;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since number
+ * of packets is 256 or greater. We are just going to have to compute
+ * a value and try to bring the count under control, though for smaller
+ * packet sizes there isn't much we can do as NAPI polling will likely
+ * be kicking in sooner rather than later.
+ */
+ itr = WX_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
+ */
+ if (avg_wire_size <= 60) {
+ /* Start at 50k ints/sec */
+ avg_wire_size = 5120;
+ } else if (avg_wire_size <= 316) {
+ /* 50K ints/sec to 16K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 2720;
+ } else if (avg_wire_size <= 1084) {
+ /* 16K ints/sec to 9.2K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1968) {
+ /* 9.2K ints/sec to 8K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 8K ints/sec */
+ avg_wire_size = 32256;
+ }
+
+ /* If we are in low latency mode half our delay which doubles the rate
+ * to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & WX_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size >>= 1;
+
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 25G, 10G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ switch (q_vector->wx->speed) {
+ case SPEED_25000:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ WX_ITR_ADAPTIVE_MIN_INC * 512) *
+ WX_ITR_ADAPTIVE_MIN_INC;
+ break;
+ case SPEED_10000:
+ case SPEED_100:
+ default:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ WX_ITR_ADAPTIVE_MIN_INC * 256) *
+ WX_ITR_ADAPTIVE_MIN_INC;
+ break;
+ case SPEED_1000:
+ case SPEED_10:
+ if (avg_wire_size > 8064)
+ avg_wire_size = 8064;
+ itr += DIV_ROUND_UP(avg_wire_size,
+ WX_ITR_ADAPTIVE_MIN_INC * 64) *
+ WX_ITR_ADAPTIVE_MIN_INC;
+ break;
+ }
+
+clear_counts:
+ /* write back value */
+ ring_container->itr = itr;
+
+ /* next update should occur within next jiffy */
+ ring_container->next_update = next_update + 1;
+
+ ring_container->total_bytes = 0;
+ ring_container->total_packets = 0;
+}
+
+static void wx_set_itr(struct wx_q_vector *q_vector)
+{
+ struct wx *wx = q_vector->wx;
+ u32 new_itr;
+
+ wx_update_itr(q_vector, &q_vector->tx);
+ wx_update_itr(q_vector, &q_vector->rx);
+
+ /* use the smallest value of new ITR delay calculations */
+ new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
+
+ /* Clear latency flag if set, shift into correct position */
+ new_itr &= ~WX_ITR_ADAPTIVE_LATENCY;
+ new_itr <<= 2;
+
+ if (new_itr != q_vector->itr) {
+ /* save the algorithm value here */
+ q_vector->itr = new_itr;
+
+ if (wx->pdev->is_virtfn)
+ wx_write_eitr_vf(q_vector);
+ else
+ wx_write_eitr(q_vector);
+ }
+}
+
/**
* wx_poll - NAPI polling RX/TX cleanup routine
* @napi: napi struct with our devices info in it
@@ -878,6 +1084,8 @@ static int wx_poll(struct napi_struct *napi, int budget)
/* all work done, exit the polling mode */
if (likely(napi_complete_done(napi, work_done))) {
+ if (wx->rx_itr_setting == 1)
+ wx_set_itr(q_vector);
if (netif_running(wx->netdev))
wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx));
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 5c52a1db4024..3530e0ef32c5 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -416,6 +416,14 @@ enum WX_MSCA_CMD_value {
#define WX_AML_MAX_EITR 0x00000FFFU
#define WX_EM_MAX_EITR 0x00007FFCU
+#define WX_ITR_ADAPTIVE_MIN_INC 2
+#define WX_ITR_ADAPTIVE_MIN_USECS 10
+#define WX_ITR_ADAPTIVE_MAX_USECS 84
+#define WX_ITR_ADAPTIVE_LATENCY 0x80
+#define WX_ITR_ADAPTIVE_BULK 0x00
+#define WX_ITR_ADAPTIVE_MASK_USECS (WX_ITR_ADAPTIVE_LATENCY - \
+ WX_ITR_ADAPTIVE_MIN_INC)
+
/* transmit DMA Registers */
#define WX_PX_TR_BAL(_i) (0x03000 + ((_i) * 0x40))
#define WX_PX_TR_BAH(_i) (0x03004 + ((_i) * 0x40))
@@ -1030,6 +1038,7 @@ struct wx_rx_queue_stats {
struct wx_ring_container {
struct wx_ring *ring; /* pointer to linked list of rings */
+ unsigned long next_update; /* jiffies value of last update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
u8 count; /* total number of rings in vector */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
index 5d48df7a849f..7bcf7e90883b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
@@ -10,7 +10,7 @@
#include "wx_vf.h"
#include "wx_vf_lib.h"
-static void wx_write_eitr_vf(struct wx_q_vector *q_vector)
+void wx_write_eitr_vf(struct wx_q_vector *q_vector)
{
struct wx *wx = q_vector->wx;
int v_idx = q_vector->v_idx;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
index 43ea126b79eb..a4bd23c92800 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
@@ -4,6 +4,7 @@
#ifndef _WX_VF_LIB_H_
#define _WX_VF_LIB_H_
+void wx_write_eitr_vf(struct wx_q_vector *q_vector);
void wx_configure_msix_vf(struct wx *wx);
int wx_write_uc_addr_list_vf(struct net_device *netdev);
void wx_setup_psrtype_vf(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
index 7e2d9ec38a30..2ca127a7aa77 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
@@ -115,7 +115,8 @@ static int ngbe_set_channels(struct net_device *dev,
static const struct ethtool_ops ngbe_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
+ ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_drvinfo = wx_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ksettings = wx_get_link_ksettings,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
index a4753402660e..86f3c106f1ed 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
@@ -538,7 +538,8 @@ static int txgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
static const struct ethtool_ops txgbe_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
+ ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_drvinfo = wx_get_drvinfo,
.nway_reset = wx_nway_reset,
.get_link = ethtool_op_get_link,
--
2.48.1
Powered by blists - more mailing lists