[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<PAXPR04MB8510554C5C99B7F429EBB84988052@PAXPR04MB8510.eurprd04.prod.outlook.com>
Date: Wed, 18 Dec 2024 05:45:41 +0000
From: Wei Fang <wei.fang@....com>
To: Alexander Lobakin <aleksander.lobakin@...el.com>
CC: Claudiu Manoil <claudiu.manoil@....com>, Vladimir Oltean
<vladimir.oltean@....com>, Clark Wang <xiaoning.wang@....com>,
"andrew+netdev@...n.ch" <andrew+netdev@...n.ch>, "davem@...emloft.net"
<davem@...emloft.net>, "edumazet@...gle.com" <edumazet@...gle.com>,
"kuba@...nel.org" <kuba@...nel.org>, "pabeni@...hat.com" <pabeni@...hat.com>,
Frank Li <frank.li@....com>, "horms@...nel.org" <horms@...nel.org>,
"idosch@...sch.org" <idosch@...sch.org>, "netdev@...r.kernel.org"
<netdev@...r.kernel.org>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "imx@...ts.linux.dev" <imx@...ts.linux.dev>
Subject: RE: [PATCH v8 net-next 3/4] net: enetc: add LSO support for i.MX95
ENETC PF
> > > +static inline int enetc_lso_count_descs(const struct sk_buff *skb) {
> > > + /* 4 BDs: 1 BD for LSO header + 1 BD for extended BD + 1 BD
> > > + * for linear area data but not include LSO header, namely
> > > + * skb_headlen(skb) - lso_hdr_len. And 1 BD for gap.
> >
> > What if the head contains headers only and
> > `skb_headlen(skb) - lso_hdr_len` is 0?
> >
>
> enetc_lso_count_descs() is a simple helper and only used to calculate
> the number of BDs needed in the worst case, so that we can check
> whether there are enough BDs to accommodate the current frame.
> It has no significant impact on the case you mentioned.
>
> > > + */
> > > + return skb_shinfo(skb)->nr_frags + 4; }
> > > +
> > > +static int enetc_lso_get_hdr_len(const struct sk_buff *skb) {
> > > + int hdr_len, tlen;
> > > +
> > > + tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
> > > + hdr_len = skb_transport_offset(skb) + tlen;
> > > +
> > > + return hdr_len;
> > > +}
> >
> > Are you sure the kernel doesn't have similar generic helpers?
>
> This function refers to tso_start() in tso.c. I have not found any other similar
> helper functions in kernel.
> >
> > > +
> > > +static void enetc_lso_start(struct sk_buff *skb, struct enetc_lso_t
> > > +*lso) {
> > > + lso->lso_seg_size = skb_shinfo(skb)->gso_size;
> > > + lso->ipv6 = enetc_skb_is_ipv6(skb);
> > > + lso->tcp = skb_is_gso_tcp(skb);
> > > + lso->l3_hdr_len = skb_network_header_len(skb);
> > > + lso->l3_start = skb_network_offset(skb);
> > > + lso->hdr_len = enetc_lso_get_hdr_len(skb);
> > > + lso->total_len = skb->len - lso->hdr_len; }
> > > +
> > > +static void enetc_lso_map_hdr(struct enetc_bdr *tx_ring, struct sk_buff
> > *skb,
> > > + int *i, struct enetc_lso_t *lso) {
> > > + union enetc_tx_bd txbd_tmp, *txbd;
> > > + struct enetc_tx_swbd *tx_swbd;
> > > + u16 frm_len, frm_len_ext;
> > > + u8 flags, e_flags = 0;
> > > + dma_addr_t addr;
> > > + char *hdr;
> > > +
> > > + /* Get the first BD of the LSO BDs chain */
> > > + txbd = ENETC_TXBD(*tx_ring, *i);
> > > + tx_swbd = &tx_ring->tx_swbd[*i];
> > > + prefetchw(txbd);
> >
> > Is this prefetchw() proven to give any benefit?
> >
>
> Just to keep the logic consistent with the current code. Existing code always
> uses prefetchw() before setting up txbd, and I see no reason for it to behave
> differently in different places. I also don't have a quick answer to what the
> benefits are. :(
>
> > > +
> > > + /* Prepare LSO header: MAC + IP + TCP/UDP */
> > > + hdr = tx_ring->tso_headers + *i * TSO_HEADER_SIZE;
> > > + memcpy(hdr, skb->data, lso->hdr_len);
> > > + addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE;
> > > +
> > > + frm_len = lso->total_len & 0xffff;
> > > + frm_len_ext = (lso->total_len >> 16) & 0xf;
> >
> > Why are these magics just open-coded, even without any comment?
> > I have no idea what is going on here for example.
> >
> > Also, `& 0xffff` is lower_16_bits(), while `lso->total_len >> 16` is
> upper_16_bits().
>
> frm_len is the lower 16 bits of the frame length, frm_len_ext is the higher 4 bits
> of the frame length, I will add some comments or macros.
> >
> > > +
> > > + /* Set the flags of the first BD */
> > > + flags = ENETC_TXBD_FLAGS_EX | ENETC_TXBD_FLAGS_CSUM_LSO |
> > > + ENETC_TXBD_FLAGS_LSO | ENETC_TXBD_FLAGS_L4CS;
> > > +
> > > + enetc_clear_tx_bd(&txbd_tmp);
> > > + txbd_tmp.addr = cpu_to_le64(addr);
> > > + txbd_tmp.hdr_len = cpu_to_le16(lso->hdr_len);
> > > +
> > > + /* first BD needs frm_len and offload flags set */
> > > + txbd_tmp.frm_len = cpu_to_le16(frm_len);
> > > + txbd_tmp.flags = flags;
> > > +
> > > + txbd_tmp.l3_aux0 = FIELD_PREP(ENETC_TX_BD_L3_START,
> lso->l3_start);
> > > + /* l3_hdr_size in 32-bits (4 bytes) */
> > > + txbd_tmp.l3_aux1 = FIELD_PREP(ENETC_TX_BD_L3_HDR_LEN,
> > > + lso->l3_hdr_len / 4);
> > > + if (lso->ipv6)
> > > + txbd_tmp.l3_aux1 |= FIELD_PREP(ENETC_TX_BD_L3T, 1);
> > > + else
> > > + txbd_tmp.l3_aux0 |= FIELD_PREP(ENETC_TX_BD_IPCS, 1);
> >
> > Both these "fields" are single bits. You don't need FIELD_PREP() for single-bit
> > fields, just `|= ENETC_TX_BD_L3T` etc.
>
> Okay, thanks.
> >
> > > +
> > > + txbd_tmp.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, lso->tcp ?
> > > + ENETC_TXBD_L4T_TCP :
> ENETC_TXBD_L4T_UDP);
> > > +
> > > + /* For the LSO header we do not set the dma address since
> > > + * we do not want it unmapped when we do cleanup. We still
> > > + * set len so that we count the bytes sent.
> > > + */
> > > + tx_swbd->len = lso->hdr_len;
> > > + tx_swbd->do_twostep_tstamp = false;
> > > + tx_swbd->check_wb = false;
> > > +
> > > + /* Actually write the header in the BD */
> > > + *txbd = txbd_tmp;
> > > +
> > > + /* Get the next BD, and the next BD is extended BD */
> > > + enetc_bdr_idx_inc(tx_ring, i);
> > > + txbd = ENETC_TXBD(*tx_ring, *i);
> > > + tx_swbd = &tx_ring->tx_swbd[*i];
> > > + prefetchw(txbd);
> >
> > (same question as for the previous prefetchw())
> >
> > > +
> > > + enetc_clear_tx_bd(&txbd_tmp);
> > > + if (skb_vlan_tag_present(skb)) {
> > > + /* Setup the VLAN fields */
> > > + txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
> > > + txbd_tmp.ext.tpid = 0; /* < C-TAG */
> >
> > ???
> >
> > Maybe #define it somewhere, that 0 means CVLAN etc.?
>
> Okay, accept.
>
> >
> > > + e_flags = ENETC_TXBD_E_FLAGS_VLAN_INS;
> > > + }
> > > +
> > > + /* Write the BD */
> > > + txbd_tmp.ext.e_flags = e_flags;
> > > + txbd_tmp.ext.lso_sg_size = cpu_to_le16(lso->lso_seg_size);
> > > + txbd_tmp.ext.frm_len_ext = cpu_to_le16(frm_len_ext);
> > > + *txbd = txbd_tmp;
> > > +}
> > > +
> > > +static int enetc_lso_map_data(struct enetc_bdr *tx_ring, struct sk_buff
> *skb,
> > > + int *i, struct enetc_lso_t *lso, int *count) {
> > > + union enetc_tx_bd txbd_tmp, *txbd = NULL;
> > > + struct enetc_tx_swbd *tx_swbd;
> > > + skb_frag_t *frag;
> > > + dma_addr_t dma;
> > > + u8 flags = 0;
> > > + int len, f;
> > > +
> > > + len = skb_headlen(skb) - lso->hdr_len;
> > > + if (len > 0) {
> > > + dma = dma_map_single(tx_ring->dev, skb->data + lso->hdr_len,
> > > + len, DMA_TO_DEVICE);
> > > + if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
> >
> > dma_mapping_error() already contains unlikely().
>
> Will remove 'unlikely'. Thanks.
>
> And I will remove all the 'unlikely' for dma_mapping_error() in the future.
>
> >
> > > + return -ENOMEM;
> > > +
> > > + enetc_bdr_idx_inc(tx_ring, i);
> > > + txbd = ENETC_TXBD(*tx_ring, *i);
> > > + tx_swbd = &tx_ring->tx_swbd[*i];
> > > + prefetchw(txbd);
> > > + *count += 1;
> > > +
> > > + enetc_clear_tx_bd(&txbd_tmp);
> > > + txbd_tmp.addr = cpu_to_le64(dma);
> > > + txbd_tmp.buf_len = cpu_to_le16(len);
> > > +
> > > + tx_swbd->dma = dma;
> > > + tx_swbd->len = len;
> > > + tx_swbd->is_dma_page = 0;
> > > + tx_swbd->dir = DMA_TO_DEVICE;
> > > + }
> > > +
> > > + frag = &skb_shinfo(skb)->frags[0];
> > > + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) {
> > > + if (txbd)
> > > + *txbd = txbd_tmp;
> > > +
> > > + len = skb_frag_size(frag);
> > > + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
> > > + DMA_TO_DEVICE);
> >
> > You now can use skb_frag_dma_map() with 2-4 arguments, so this can be
> > replaced to
> >
> > dma = skb_frag_dma_map(tx_ring->dev, frag);
>
>
> But my compiler complains the error:
> drivers/net/ethernet/freescale/enetc/enetc.c: In function
> ‘enetc_lso_map_data’:
> drivers/net/ethernet/freescale/enetc/enetc.c:684:23: error: too few
> arguments to function ‘skb_frag_dma_map’
> 684 | dma = skb_frag_dma_map(tx_ring->dev, frag);
> | ^~~~~~~~~~~~~~~~
>
My bad, I did not update my code base.
> >
> > > + if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
> > > + return -ENOMEM;
> > > +
> > > + /* Get the next BD */
> > > + enetc_bdr_idx_inc(tx_ring, i);
> > > + txbd = ENETC_TXBD(*tx_ring, *i);
> > > + tx_swbd = &tx_ring->tx_swbd[*i];
> > > + prefetchw(txbd);
> > > + *count += 1;
> > > +
> > > + enetc_clear_tx_bd(&txbd_tmp);
> > > + txbd_tmp.addr = cpu_to_le64(dma);
> > > + txbd_tmp.buf_len = cpu_to_le16(len);
> > > +
> > > + tx_swbd->dma = dma;
> > > + tx_swbd->len = len;
> > > + tx_swbd->is_dma_page = 1;
> > > + tx_swbd->dir = DMA_TO_DEVICE;
> > > + }
> > > +
> > > + /* Last BD needs 'F' bit set */
> > > + flags |= ENETC_TXBD_FLAGS_F;
> > > + txbd_tmp.flags = flags;
> > > + *txbd = txbd_tmp;
> > > +
> > > + tx_swbd->is_eof = 1;
> > > + tx_swbd->skb = skb;
> > > +
> > > + return 0;
> > > +}
> >
> > [...]
> >
> > > @@ -2096,6 +2329,13 @@ static int enetc_setup_default_rss_table(struct
> > enetc_si *si, int num_groups)
> > > return 0;
> > > }
> > >
> > > +static void enetc_set_lso_flags_mask(struct enetc_hw *hw) {
> > > + enetc_wr(hw, ENETC4_SILSOSFMR0,
> > > + SILSOSFMR0_VAL_SET(TCP_NL_SEG_FLAGS_DMASK,
> > TCP_NL_SEG_FLAGS_DMASK));
> > > + enetc_wr(hw, ENETC4_SILSOSFMR1, 0);
> > > +}
> > > +
> > > int enetc_configure_si(struct enetc_ndev_priv *priv) {
> > > struct enetc_si *si = priv->si;
> > > @@ -2109,6 +2349,9 @@ int enetc_configure_si(struct enetc_ndev_priv
> > *priv)
> > > /* enable SI */
> > > enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN);
> > >
> > > + if (si->hw_features & ENETC_SI_F_LSO)
> > > + enetc_set_lso_flags_mask(hw);
> > > +
> > > /* TODO: RSS support for i.MX95 will be supported later, and the
> > > * is_enetc_rev1() condition will be removed
> > > */
> > > diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h
> > > b/drivers/net/ethernet/freescale/enetc/enetc.h
> > > index 1e680f0f5123..6db6b3eee45c 100644
> > > --- a/drivers/net/ethernet/freescale/enetc/enetc.h
> > > +++ b/drivers/net/ethernet/freescale/enetc/enetc.h
> > > @@ -41,6 +41,19 @@ struct enetc_tx_swbd {
> > > u8 qbv_en:1;
> > > };
> > >
> > > +struct enetc_lso_t {
> > > + bool ipv6;
> > > + bool tcp;
> > > + u8 l3_hdr_len;
> > > + u8 hdr_len; /* LSO header length */
> > > + u8 l3_start;
> > > + u16 lso_seg_size;
> > > + int total_len; /* total data length, not include LSO header */
> > > +};
> > > +
> > > +#define ENETC_1KB_SIZE 1024
> >
> > SZ_1K
> >
> > > +#define ENETC_LSO_MAX_DATA_LEN (256 * ENETC_1KB_SIZE)
> >
> > SZ_256K
> >
> > > +
> > > #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE
> > > #define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */
> > > #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if
> needed
> > */
> > > @@ -238,6 +251,7 @@ enum enetc_errata { #define ENETC_SI_F_PSFP
> > > BIT(0) #define ENETC_SI_F_QBV BIT(1) #define ENETC_SI_F_QBU
> > BIT(2)
> > > +#define ENETC_SI_F_LSO BIT(3)
> > >
> > > struct enetc_drvdata {
> > > u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */
> @@
> > > -351,6 +365,7 @@ enum enetc_active_offloads {
> > > ENETC_F_QCI = BIT(10),
> > > ENETC_F_QBU = BIT(11),
> > > ENETC_F_TXCSUM = BIT(12),
> > > + ENETC_F_LSO = BIT(13),
> > > };
> > >
> > > enum enetc_flags_bit {
> > > diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> > > b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> > > index 26b220677448..cdde8e93a73c 100644
> > > --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> > > +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
> > > @@ -12,6 +12,28 @@
> > > #define NXP_ENETC_VENDOR_ID 0x1131
> > > #define NXP_ENETC_PF_DEV_ID 0xe101
> > >
> > > +/**********************Station interface
> > > +registers************************/
> > > +/* Station interface LSO segmentation flag mask register 0/1 */
> > > +#define ENETC4_SILSOSFMR0 0x1300
> > > +#define SILSOSFMR0_TCP_MID_SEG GENMASK(27, 16)
> > > +#define SILSOSFMR0_TCP_1ST_SEG GENMASK(11, 0)
> > > +#define SILSOSFMR0_VAL_SET(first, mid) ((((mid) << 16) &
> > SILSOSFMR0_TCP_MID_SEG) | \
> >
> > Why not FIELD_PREP()?
>
> Okay, accept.
>
> >
> > > + ((first) & SILSOSFMR0_TCP_1ST_SEG))
> > > +
> > > +#define ENETC4_SILSOSFMR1 0x1304
> > > +#define SILSOSFMR1_TCP_LAST_SEG GENMASK(11, 0)
> > > +#define TCP_FLAGS_FIN BIT(0)
> > > +#define TCP_FLAGS_SYN BIT(1)
> > > +#define TCP_FLAGS_RST BIT(2)
> > > +#define TCP_FLAGS_PSH BIT(3)
> > > +#define TCP_FLAGS_ACK BIT(4)
> > > +#define TCP_FLAGS_URG BIT(5)
> > > +#define TCP_FLAGS_ECE BIT(6)
> > > +#define TCP_FLAGS_CWR BIT(7)
> > > +#define TCP_FLAGS_NS BIT(8)
> >
> > Why are you open-coding these if they're present in uapi/linux/tcp.h?
>
> Okay, I will add 'ENETC' prefix.
> >
> > > +/* According to tso_build_hdr(), clear all special flags for not last
> > > +packet. */
> >
> > But this mask is used only to do a writel(), I don't see it anywhere clearing
> > anything...
>
> The hardware will help mask of TCP header flags, we just need to set
> flag mask register.
>
> >
> > > +#define TCP_NL_SEG_FLAGS_DMASK (TCP_FLAGS_FIN |
> > TCP_FLAGS_RST | TCP_FLAGS_PSH)
> > > +
> > > /***************************ENETC port
> > registers**************************/
> > > #define ENETC4_ECAPR0 0x0
> > > #define ECAPR0_RFS BIT(2)
> > Thanks,
> > Olek
Powered by blists - more mailing lists