[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZyztCTpa08BCXWpE@lizhi-Precision-Tower-5810>
Date: Thu, 7 Nov 2024 11:38:33 -0500
From: Frank Li <Frank.li@....com>
To: Wei Fang <wei.fang@....com>
Cc: claudiu.manoil@....com, vladimir.oltean@....com, xiaoning.wang@....com,
andrew+netdev@...n.ch, davem@...emloft.net, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, imx@...ts.linux.dev
Subject: Re: [PATCH net-next 4/5] net: enetc: add LSO support for i.MX95
ENETC PF
On Thu, Nov 07, 2024 at 11:38:16AM +0800, Wei Fang wrote:
> ENETC rev 4.1 supports large send offload (LSO), segmenting large TCP
> and UDP transmit units into multiple Ethernet frames. To support LSO,
> software needs to fill some auxiliary information in Tx BD, such as LSO
> header length, frame length, LSO maximum segment size, etc.
>
> At 1Gbps link rate, TCP segmentation was tested using iperf3, and the
> CPU performance before and after applying the patch was compared through
> the top command. It can be seen that LSO saves a significant amount of
> CPU cycles compared to software TSO.
>
> Before applying the patch:
> %Cpu(s): 0.1 us, 4.1 sy, 0.0 ni, 85.7 id, 0.0 wa, 0.5 hi, 9.7 si
>
> After applying the patch:
> %Cpu(s): 0.1 us, 2.3 sy, 0.0 ni, 94.5 id, 0.0 wa, 0.4 hi, 2.6 si
>
> Signed-off-by: Wei Fang <wei.fang@....com>
Reviewed-by: Frank Li <Frank.Li@....com>
> ---
> drivers/net/ethernet/freescale/enetc/enetc.c | 266 +++++++++++++++++-
> drivers/net/ethernet/freescale/enetc/enetc.h | 15 +
> .../net/ethernet/freescale/enetc/enetc4_hw.h | 22 ++
> .../net/ethernet/freescale/enetc/enetc_hw.h | 15 +-
> .../freescale/enetc/enetc_pf_common.c | 3 +
> 5 files changed, 311 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
> index b294ca4c2885..5586bdc7ab59 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc.c
> +++ b/drivers/net/ethernet/freescale/enetc/enetc.c
> @@ -528,6 +528,233 @@ static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso
> }
> }
>
> +static inline int enetc_lso_count_descs(const struct sk_buff *skb)
> +{
> + /* 4 BDs: 1 BD for LSO header + 1 BD for extended BD + 1 BD
> + * for linear area data but not include LSO header, namely
> + * skb_headlen(skb) - lso_hdr_len. And 1 BD for gap.
> + */
> + return skb_shinfo(skb)->nr_frags + 4;
> +}
> +
> +static int enetc_lso_get_hdr_len(const struct sk_buff *skb)
> +{
> + int hdr_len, tlen;
> +
> + tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
> + hdr_len = skb_transport_offset(skb) + tlen;
> +
> + return hdr_len;
> +}
> +
> +static void enetc_lso_start(struct sk_buff *skb, struct enetc_lso_t *lso)
> +{
> + lso->lso_seg_size = skb_shinfo(skb)->gso_size;
> + lso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
> + lso->tcp = skb_is_gso_tcp(skb);
> + lso->l3_hdr_len = skb_network_header_len(skb);
> + lso->l3_start = skb_network_offset(skb);
> + lso->hdr_len = enetc_lso_get_hdr_len(skb);
> + lso->total_len = skb->len - lso->hdr_len;
> +}
> +
> +static void enetc_lso_map_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb,
> + int *i, struct enetc_lso_t *lso)
> +{
> + union enetc_tx_bd txbd_tmp, *txbd;
> + struct enetc_tx_swbd *tx_swbd;
> + u16 frm_len, frm_len_ext;
> + u8 flags, e_flags = 0;
> + dma_addr_t addr;
> + char *hdr;
> +
> + /* Get the fisrt BD of the LSO BDs chain */
> + txbd = ENETC_TXBD(*tx_ring, *i);
> + tx_swbd = &tx_ring->tx_swbd[*i];
> + prefetchw(txbd);
> +
> + /* Prepare LSO header: MAC + IP + TCP/UDP */
> + hdr = tx_ring->tso_headers + *i * TSO_HEADER_SIZE;
> + memcpy(hdr, skb->data, lso->hdr_len);
> + addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE;
> +
> + frm_len = lso->total_len & 0xffff;
> + frm_len_ext = (lso->total_len >> 16) & 0xf;
> +
> + /* Set the flags of the first BD */
> + flags = ENETC_TXBD_FLAGS_EX | ENETC_TXBD_FLAGS_CSUM_LSO |
> + ENETC_TXBD_FLAGS_LSO | ENETC_TXBD_FLAGS_L4CS;
> +
> + enetc_clear_tx_bd(&txbd_tmp);
> + txbd_tmp.addr = cpu_to_le64(addr);
> + txbd_tmp.hdr_len = cpu_to_le16(lso->hdr_len);
> +
> + /* first BD needs frm_len and offload flags set */
> + txbd_tmp.frm_len = cpu_to_le16(frm_len);
> + txbd_tmp.flags = flags;
> +
> + if (lso->tcp)
> + txbd_tmp.l4t = ENETC_TXBD_L4T_TCP;
> + else
> + txbd_tmp.l4t = ENETC_TXBD_L4T_UDP;
> +
> + if (lso->ipv6)
> + txbd_tmp.l3t = 1;
> + else
> + txbd_tmp.ipcs = 1;
> +
> + /* l3_hdr_size in 32-bits (4 bytes) */
> + txbd_tmp.l3_hdr_size = lso->l3_hdr_len / 4;
> + txbd_tmp.l3_start = lso->l3_start;
> +
> + /* For the LSO header we do not set the dma address since
> + * we do not want it unmapped when we do cleanup. We still
> + * set len so that we count the bytes sent.
> + */
> + tx_swbd->len = lso->hdr_len;
> + tx_swbd->do_twostep_tstamp = false;
> + tx_swbd->check_wb = false;
> +
> + /* Actually write the header in the BD */
> + *txbd = txbd_tmp;
> +
> + /* Get the next BD, and the next BD is extended BD */
> + enetc_bdr_idx_inc(tx_ring, i);
> + txbd = ENETC_TXBD(*tx_ring, *i);
> + tx_swbd = &tx_ring->tx_swbd[*i];
> + prefetchw(txbd);
> +
> + enetc_clear_tx_bd(&txbd_tmp);
> + if (skb_vlan_tag_present(skb)) {
> + /* Setup the VLAN fields */
> + txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
> + txbd_tmp.ext.tpid = 0; /* < C-TAG */
> + e_flags = ENETC_TXBD_E_FLAGS_VLAN_INS;
> + }
> +
> + /* Write the BD */
> + txbd_tmp.ext.e_flags = e_flags;
> + txbd_tmp.ext.lso_sg_size = cpu_to_le16(lso->lso_seg_size);
> + txbd_tmp.ext.frm_len_ext = cpu_to_le16(frm_len_ext);
> + *txbd = txbd_tmp;
> +}
> +
> +static int enetc_lso_map_data(struct enetc_bdr *tx_ring, struct sk_buff *skb,
> + int *i, struct enetc_lso_t *lso, int *count)
> +{
> + union enetc_tx_bd txbd_tmp, *txbd = NULL;
> + struct enetc_tx_swbd *tx_swbd;
> + skb_frag_t *frag;
> + dma_addr_t dma;
> + u8 flags = 0;
> + int len, f;
> +
> + len = skb_headlen(skb) - lso->hdr_len;
> + if (len > 0) {
> + dma = dma_map_single(tx_ring->dev, skb->data + lso->hdr_len,
> + len, DMA_TO_DEVICE);
> + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
> + netdev_err(tx_ring->ndev, "DMA map error\n");
> + goto dma_err;
> + }
> +
> + enetc_bdr_idx_inc(tx_ring, i);
> + txbd = ENETC_TXBD(*tx_ring, *i);
> + tx_swbd = &tx_ring->tx_swbd[*i];
> + prefetchw(txbd);
> + *count += 1;
> +
> + enetc_clear_tx_bd(&txbd_tmp);
> + txbd_tmp.addr = cpu_to_le64(dma);
> + txbd_tmp.buf_len = cpu_to_le16(len);
> +
> + tx_swbd->dma = dma;
> + tx_swbd->len = len;
> + tx_swbd->is_dma_page = 0;
> + tx_swbd->dir = DMA_TO_DEVICE;
> + }
> +
> + frag = &skb_shinfo(skb)->frags[0];
> + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) {
> + if (txbd)
> + *txbd = txbd_tmp;
> +
> + len = skb_frag_size(frag);
> + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
> + DMA_TO_DEVICE);
> + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
> + netdev_err(tx_ring->ndev, "DMA map error\n");
> + goto dma_err;
> + }
> +
> + /* Get the next BD */
> + enetc_bdr_idx_inc(tx_ring, i);
> + txbd = ENETC_TXBD(*tx_ring, *i);
> + tx_swbd = &tx_ring->tx_swbd[*i];
> + prefetchw(txbd);
> + *count += 1;
> +
> + enetc_clear_tx_bd(&txbd_tmp);
> + txbd_tmp.addr = cpu_to_le64(dma);
> + txbd_tmp.buf_len = cpu_to_le16(len);
> +
> + tx_swbd->dma = dma;
> + tx_swbd->len = len;
> + tx_swbd->is_dma_page = 1;
> + tx_swbd->dir = DMA_TO_DEVICE;
> + }
> +
> + /* Last BD needs 'F' bit set */
> + flags |= ENETC_TXBD_FLAGS_F;
> + txbd_tmp.flags = flags;
> + *txbd = txbd_tmp;
> +
> + tx_swbd->is_eof = 1;
> + tx_swbd->skb = skb;
> +
> + return 0;
> +
> +dma_err:
> + return -ENOMEM;
> +}
> +
> +static int enetc_lso_hw_offload(struct enetc_bdr *tx_ring, struct sk_buff *skb)
> +{
> + struct enetc_tx_swbd *tx_swbd;
> + struct enetc_lso_t lso = {0};
> + int err, i, count = 0;
> +
> + /* Initialize the LSO handler */
> + enetc_lso_start(skb, &lso);
> + i = tx_ring->next_to_use;
> +
> + enetc_lso_map_hdr(tx_ring, skb, &i, &lso);
> + /* First BD and an extend BD */
> + count += 2;
> +
> + err = enetc_lso_map_data(tx_ring, skb, &i, &lso, &count);
> + if (err)
> + goto dma_err;
> +
> + /* Go to the next BD */
> + enetc_bdr_idx_inc(tx_ring, &i);
> + tx_ring->next_to_use = i;
> + enetc_update_tx_ring_tail(tx_ring);
> +
> + return count;
> +
> +dma_err:
> + do {
> + tx_swbd = &tx_ring->tx_swbd[i];
> + enetc_free_tx_frame(tx_ring, tx_swbd);
> + if (i == 0)
> + i = tx_ring->bd_count;
> + i--;
> + } while (count--);
> +
> + return 0;
> +}
> +
> static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
> {
> struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
> @@ -648,14 +875,26 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
> tx_ring = priv->tx_ring[skb->queue_mapping];
>
> if (skb_is_gso(skb)) {
> - if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
> - netif_stop_subqueue(ndev, tx_ring->index);
> - return NETDEV_TX_BUSY;
> - }
> + /* LSO data unit lengths of up to 256KB are supported */
> + if (priv->active_offloads & ENETC_F_LSO &&
> + (skb->len - enetc_lso_get_hdr_len(skb)) <=
> + ENETC_LSO_MAX_DATA_LEN) {
> + if (enetc_bd_unused(tx_ring) < enetc_lso_count_descs(skb)) {
> + netif_stop_subqueue(ndev, tx_ring->index);
> + return NETDEV_TX_BUSY;
> + }
>
> - enetc_lock_mdio();
> - count = enetc_map_tx_tso_buffs(tx_ring, skb);
> - enetc_unlock_mdio();
> + count = enetc_lso_hw_offload(tx_ring, skb);
> + } else {
> + if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
> + netif_stop_subqueue(ndev, tx_ring->index);
> + return NETDEV_TX_BUSY;
> + }
> +
> + enetc_lock_mdio();
> + count = enetc_map_tx_tso_buffs(tx_ring, skb);
> + enetc_unlock_mdio();
> + }
> } else {
> if (unlikely(skb_shinfo(skb)->nr_frags > priv->max_frags))
> if (unlikely(skb_linearize(skb)))
> @@ -1801,6 +2040,9 @@ void enetc_get_si_caps(struct enetc_si *si)
> si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(rss);
> }
>
> + if (val & ENETC_SIPCAPR0_LSO)
> + si->hw_features |= ENETC_SI_F_LSO;
> +
> if (val & ENETC_SIPCAPR0_QBV)
> si->hw_features |= ENETC_SI_F_QBV;
>
> @@ -2105,6 +2347,13 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
> return 0;
> }
>
> +static void enetc_set_lso_flags_mask(struct enetc_hw *hw)
> +{
> + enetc_wr(hw, ENETC4_SILSOSFMR0,
> + SILSOSFMR0_VAL_SET(TCP_NL_SEG_FLAGS_DMASK, TCP_NL_SEG_FLAGS_DMASK));
> + enetc_wr(hw, ENETC4_SILSOSFMR1, 0);
> +}
> +
> int enetc_configure_si(struct enetc_ndev_priv *priv)
> {
> struct enetc_si *si = priv->si;
> @@ -2118,6 +2367,9 @@ int enetc_configure_si(struct enetc_ndev_priv *priv)
> /* enable SI */
> enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN);
>
> + if (si->hw_features & ENETC_SI_F_LSO)
> + enetc_set_lso_flags_mask(hw);
> +
> /* TODO: RSS support for i.MX95 will be supported later, and the
> * is_enetc_rev1() condition will be removed
> */
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
> index a78af4f624e0..0a69f72fe8ec 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc.h
> +++ b/drivers/net/ethernet/freescale/enetc/enetc.h
> @@ -41,6 +41,19 @@ struct enetc_tx_swbd {
> u8 qbv_en:1;
> };
>
> +struct enetc_lso_t {
> + bool ipv6;
> + bool tcp;
> + u8 l3_hdr_len;
> + u8 hdr_len; /* LSO header length */
> + u8 l3_start;
> + u16 lso_seg_size;
> + int total_len; /* total data length, not include LSO header */
> +};
> +
> +#define ENETC_1KB_SIZE 1024
> +#define ENETC_LSO_MAX_DATA_LEN (256 * ENETC_1KB_SIZE)
> +
> #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE
> #define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */
> #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */
> @@ -238,6 +251,7 @@ enum enetc_errata {
> #define ENETC_SI_F_PSFP BIT(0)
> #define ENETC_SI_F_QBV BIT(1)
> #define ENETC_SI_F_QBU BIT(2)
> +#define ENETC_SI_F_LSO BIT(3)
>
> struct enetc_drvdata {
> u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */
> @@ -353,6 +367,7 @@ enum enetc_active_offloads {
> ENETC_F_QBU = BIT(11),
> ENETC_F_RXCSUM = BIT(12),
> ENETC_F_TXCSUM = BIT(13),
> + ENETC_F_LSO = BIT(14),
> };
>
> enum enetc_flags_bit {
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> index 26b220677448..cdde8e93a73c 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> @@ -12,6 +12,28 @@
> #define NXP_ENETC_VENDOR_ID 0x1131
> #define NXP_ENETC_PF_DEV_ID 0xe101
>
> +/**********************Station interface registers************************/
> +/* Station interface LSO segmentation flag mask register 0/1 */
> +#define ENETC4_SILSOSFMR0 0x1300
> +#define SILSOSFMR0_TCP_MID_SEG GENMASK(27, 16)
> +#define SILSOSFMR0_TCP_1ST_SEG GENMASK(11, 0)
> +#define SILSOSFMR0_VAL_SET(first, mid) ((((mid) << 16) & SILSOSFMR0_TCP_MID_SEG) | \
> + ((first) & SILSOSFMR0_TCP_1ST_SEG))
> +
> +#define ENETC4_SILSOSFMR1 0x1304
> +#define SILSOSFMR1_TCP_LAST_SEG GENMASK(11, 0)
> +#define TCP_FLAGS_FIN BIT(0)
> +#define TCP_FLAGS_SYN BIT(1)
> +#define TCP_FLAGS_RST BIT(2)
> +#define TCP_FLAGS_PSH BIT(3)
> +#define TCP_FLAGS_ACK BIT(4)
> +#define TCP_FLAGS_URG BIT(5)
> +#define TCP_FLAGS_ECE BIT(6)
> +#define TCP_FLAGS_CWR BIT(7)
> +#define TCP_FLAGS_NS BIT(8)
> +/* According to tso_build_hdr(), clear all special flags for not last packet. */
> +#define TCP_NL_SEG_FLAGS_DMASK (TCP_FLAGS_FIN | TCP_FLAGS_RST | TCP_FLAGS_PSH)
> +
> /***************************ENETC port registers**************************/
> #define ENETC4_ECAPR0 0x0
> #define ECAPR0_RFS BIT(2)
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
> index 590b1412fadf..34a3e8f1496e 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
> +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
> @@ -28,6 +28,8 @@
> #define ENETC_SIPCAPR0_QBV BIT(4)
> #define ENETC_SIPCAPR0_QBU BIT(3)
> #define ENETC_SIPCAPR0_RFS BIT(2)
> +#define ENETC_SIPCAPR0_LSO BIT(1)
> +#define ENETC_SIPCAPR0_RSC BIT(0)
> #define ENETC_SIPCAPR1 0x24
> #define ENETC_SITGTGR 0x30
> #define ENETC_SIRBGCR 0x38
> @@ -554,7 +556,10 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
> union enetc_tx_bd {
> struct {
> __le64 addr;
> - __le16 buf_len;
> + union {
> + __le16 buf_len;
> + __le16 hdr_len; /* For LSO, ENETC 4.1 and later */
> + };
> __le16 frm_len;
> union {
> struct {
> @@ -574,13 +579,16 @@ union enetc_tx_bd {
> __le32 tstamp;
> __le16 tpid;
> __le16 vid;
> - u8 reserved[6];
> + __le16 lso_sg_size; /* For ENETC 4.1 and later */
> + __le16 frm_len_ext; /* For ENETC 4.1 and later */
> + u8 reserved[2];
> u8 e_flags;
> u8 flags;
> } ext; /* Tx BD extension */
> struct {
> __le32 tstamp;
> - u8 reserved[10];
> + u8 reserved[8];
> + __le16 lso_err_count; /* For ENETC 4.1 and later */
> u8 status;
> u8 flags;
> } wb; /* writeback descriptor */
> @@ -589,6 +597,7 @@ union enetc_tx_bd {
> enum enetc_txbd_flags {
> ENETC_TXBD_FLAGS_L4CS = BIT(0), /* For ENETC 4.1 and later */
> ENETC_TXBD_FLAGS_TSE = BIT(1),
> + ENETC_TXBD_FLAGS_LSO = BIT(1), /* For ENETC 4.1 and later */
> ENETC_TXBD_FLAGS_W = BIT(2),
> ENETC_TXBD_FLAGS_CSUM_LSO = BIT(3), /* For ENETC 4.1 and later */
> ENETC_TXBD_FLAGS_TXSTART = BIT(4),
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
> index 2c4c6af672e7..82a67356abe4 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
> +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
> @@ -126,6 +126,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
> if (si->drvdata->tx_csum)
> priv->active_offloads |= ENETC_F_TXCSUM;
>
> + if (si->hw_features & ENETC_SI_F_LSO)
> + priv->active_offloads |= ENETC_F_LSO;
> +
> /* TODO: currently, i.MX95 ENETC driver does not support advanced features */
> if (!is_enetc_rev1(si)) {
> ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK);
> --
> 2.34.1
>
Powered by blists - more mailing lists