[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260119160843.3854173-1-aleksander.lobakin@intel.com>
Date: Mon, 19 Jan 2026 17:08:43 +0100
From: Alexander Lobakin <aleksander.lobakin@...el.com>
To: intel-wired-lan@...ts.osuosl.org
Cc: Alexander Lobakin <aleksander.lobakin@...el.com>,
Tony Nguyen <anthony.l.nguyen@...el.com>,
Przemek Kitszel <przemyslaw.kitszel@...el.com>,
Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>,
Jacob Keller <jacob.e.keller@...el.com>,
Aleksandr Loktionov <aleksandr.loktionov@...el.com>,
nxne.cnse.osdt.itp.upstreaming@...el.com,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH iwl-next] ice: reshuffle and group Rx and Tx queue fields by cachelines
Place the fields in ice_{rx,tx}_ring used in the same pieces of
hotpath code closer to each other and use
__cacheline_group_{begin,end}_aligned() to isolate the read mostly,
read-write, and cold groups into separate cachelines similarly
to idpf.
Suggested-by: Jacob Keller <jacob.e.keller@...el.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@...el.com>
Reviewed-by: Jacob Keller <jacob.e.keller@...el.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
---
Applies cleanly to today's next-queue.
Testing hints:
No functional changes in this patch, there's no way it could break
anything. If you want, you can test basic XDP actions (PASS, DROP)
and compare the performance before and after the patch.
---
drivers/net/ethernet/intel/ice/ice_txrx.h | 122 ++++++++++--------
drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 -
drivers/net/ethernet/intel/ice/ice_txrx.c | 1 -
drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 3 -
4 files changed, 70 insertions(+), 57 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index e3c682723107..557b5e656bb0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -267,34 +267,49 @@ struct ice_tstamp_ring {
} ____cacheline_internodealigned_in_smp;
struct ice_rx_ring {
- /* CL1 - 1st cacheline starts here */
+ __cacheline_group_begin_aligned(read_mostly);
void *desc; /* Descriptor ring memory */
struct page_pool *pp;
struct net_device *netdev; /* netdev ring maps to */
- struct ice_vsi *vsi; /* Backreference to associated VSI */
struct ice_q_vector *q_vector; /* Backreference to associated vector */
u8 __iomem *tail;
- u16 q_index; /* Queue number of ring */
-
- u16 count; /* Number of descriptors */
- u16 reg_idx; /* HW register index of the ring */
- u16 next_to_alloc;
union {
struct libeth_fqe *rx_fqes;
struct xdp_buff **xdp_buf;
};
- /* CL2 - 2nd cacheline starts here */
- struct libeth_fqe *hdr_fqes;
+ u16 count; /* Number of descriptors */
+ u8 ptp_rx;
+
+ u8 flags;
+#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
+#define ICE_RX_FLAGS_MULTIDEV BIT(3)
+#define ICE_RX_FLAGS_RING_GCS BIT(4)
+
+ u32 truesize;
+
struct page_pool *hdr_pp;
+ struct libeth_fqe *hdr_fqes;
+
+ struct bpf_prog *xdp_prog;
+ struct ice_tx_ring *xdp_ring;
+ struct xsk_buff_pool *xsk_pool;
+
+ /* stats structs */
+ struct ice_ring_stats *ring_stats;
+ struct ice_rx_ring *next; /* pointer to next ring in q_vector */
+ u32 hdr_truesize;
+
+ struct xdp_rxq_info xdp_rxq;
+ __cacheline_group_end_aligned(read_mostly);
+
+ __cacheline_group_begin_aligned(read_write);
union {
struct libeth_xdp_buff_stash xdp;
struct libeth_xdp_buff *xsk;
};
-
- /* CL3 - 3rd cacheline starts here */
union {
struct ice_pkt_ctx pkt_ctx;
struct {
@@ -302,75 +317,78 @@ struct ice_rx_ring {
__be16 vlan_proto;
};
};
- struct bpf_prog *xdp_prog;
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
+ __cacheline_group_end_aligned(read_write);
- u32 hdr_truesize;
- u32 truesize;
-
- /* stats structs */
- struct ice_ring_stats *ring_stats;
-
+ __cacheline_group_begin_aligned(cold);
struct rcu_head rcu; /* to avoid race on free */
- /* CL4 - 4th cacheline starts here */
+ struct ice_vsi *vsi; /* Backreference to associated VSI */
struct ice_channel *ch;
- struct ice_tx_ring *xdp_ring;
- struct ice_rx_ring *next; /* pointer to next ring in q_vector */
- struct xsk_buff_pool *xsk_pool;
- u16 rx_hdr_len;
- u16 rx_buf_len;
+
dma_addr_t dma; /* physical address of ring */
+ u16 q_index; /* Queue number of ring */
+ u16 reg_idx; /* HW register index of the ring */
u8 dcb_tc; /* Traffic class of ring */
- u8 ptp_rx;
-#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
-#define ICE_RX_FLAGS_MULTIDEV BIT(3)
-#define ICE_RX_FLAGS_RING_GCS BIT(4)
- u8 flags;
- /* CL5 - 5th cacheline starts here */
- struct xdp_rxq_info xdp_rxq;
+
+ u16 rx_hdr_len;
+ u16 rx_buf_len;
+ __cacheline_group_end_aligned(cold);
} ____cacheline_internodealigned_in_smp;
struct ice_tx_ring {
- /* CL1 - 1st cacheline starts here */
- struct ice_tx_ring *next; /* pointer to next ring in q_vector */
+ __cacheline_group_begin_aligned(read_mostly);
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
u8 __iomem *tail;
struct ice_tx_buf *tx_buf;
+
struct ice_q_vector *q_vector; /* Backreference to associated vector */
struct net_device *netdev; /* netdev ring maps to */
struct ice_vsi *vsi; /* Backreference to associated VSI */
- /* CL2 - 2nd cacheline starts here */
- dma_addr_t dma; /* physical address of ring */
- struct xsk_buff_pool *xsk_pool;
- u16 next_to_use;
- u16 next_to_clean;
- u16 q_handle; /* Queue handle per TC */
- u16 reg_idx; /* HW register index of the ring */
+
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
- u16 xdp_tx_active;
+
+ u8 flags;
+#define ICE_TX_FLAGS_RING_XDP BIT(0)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
+#define ICE_TX_FLAGS_TXTIME BIT(3)
+
+ struct xsk_buff_pool *xsk_pool;
+
/* stats structs */
struct ice_ring_stats *ring_stats;
- /* CL3 - 3rd cacheline starts here */
+ struct ice_tx_ring *next; /* pointer to next ring in q_vector */
+
+ struct ice_tstamp_ring *tstamp_ring;
+ struct ice_ptp_tx *tx_tstamps;
+ __cacheline_group_end_aligned(read_mostly);
+
+ __cacheline_group_begin_aligned(read_write);
+ u16 next_to_use;
+ u16 next_to_clean;
+
+ u16 xdp_tx_active;
+ spinlock_t tx_lock;
+ __cacheline_group_end_aligned(read_write);
+
+ __cacheline_group_begin_aligned(cold);
struct rcu_head rcu; /* to avoid race on free */
DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
struct ice_channel *ch;
- struct ice_ptp_tx *tx_tstamps;
- spinlock_t tx_lock;
- u32 txq_teid; /* Added Tx queue TEID */
- /* CL4 - 4th cacheline starts here */
- struct ice_tstamp_ring *tstamp_ring;
-#define ICE_TX_FLAGS_RING_XDP BIT(0)
-#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
-#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
-#define ICE_TX_FLAGS_TXTIME BIT(3)
- u8 flags;
+
+ dma_addr_t dma; /* physical address of ring */
+ u16 q_handle; /* Queue handle per TC */
+ u16 reg_idx; /* HW register index of the ring */
u8 dcb_tc; /* Traffic class of ring */
+
u16 quanta_prof_id;
+ u32 txq_teid; /* Added Tx queue TEID */
+ __cacheline_group_end_aligned(cold);
} ____cacheline_internodealigned_in_smp;
static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 8d8569d06119..4f79dc73a8ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3388,7 +3388,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
*/
rx_rings[i].next_to_use = 0;
rx_rings[i].next_to_clean = 0;
- rx_rings[i].next_to_alloc = 0;
*vsi->rx_rings[i] = rx_rings[i];
}
kfree(rx_rings);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index e8e1acbd5a7d..40d7252caee0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -582,7 +582,6 @@ void ice_zero_rx_ring(struct ice_rx_ring *rx_ring)
PAGE_SIZE);
memset(rx_ring->desc, 0, size);
- rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index f7006ce5104a..66d211aa0833 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -20,9 +20,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)
rx_ring->next_to_use = val;
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = val;
-
/* QRX_TAIL will be updated with any tail value, but hardware ignores
* the lower 3 bits. This makes it so we only bump tail on meaningful
* boundaries. Also, this allows us to bump tail on intervals of 8 up to
--
2.52.0
Powered by blists - more mailing lists