[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9ca4dad6-61bd-4d98-bcb2-f69be6696deb@intel.com>
Date: Thu, 22 Aug 2024 18:16:27 +0200
From: Alexander Lobakin <aleksander.lobakin@...el.com>
To: Wojciech Drewek <wojciech.drewek@...el.com>
CC: <netdev@...r.kernel.org>, <intel-wired-lan@...ts.osuosl.org>,
<horms@...nel.org>, <anthony.l.nguyen@...el.com>, <kuba@...nel.org>,
<alexandr.lobakin@...el.com>
Subject: Re: [PATCH iwl-next v10 10/14] iavf: define Rx descriptors as qwords
From: Wojciech Drewek <wojciech.drewek@...el.com>
Date: Wed, 21 Aug 2024 14:15:35 +0200
> From: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>
> The union iavf_32byte_rx_desc consists of two unnamed structs defined
> inside. One of them represents legacy 32 byte descriptor and second the
> 16 byte descriptor (extended to 32 byte). Each of them consists of
> bunch of unions, structs and __le fields that represent specific fields
> in descriptor.
[...]
> @@ -901,19 +901,18 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
> * @skb: skb currently being received and modified
> * @rx_desc: the receive descriptor
> **/
> -static void iavf_rx_checksum(struct iavf_vsi *vsi,
> - struct sk_buff *skb,
> - union iavf_rx_desc *rx_desc)
> +static void iavf_rx_checksum(struct iavf_vsi *vsi, struct sk_buff *skb,
> + struct iavf_rx_desc *rx_desc)
> {
> struct libeth_rx_pt decoded;
> u32 rx_error, rx_status;
> bool ipv4, ipv6;
> - u8 ptype;
> u64 qword;
> + u8 ptype;
Oops, please fix this where you're introducing this.
Also, I don't think it's a good idea to limit ptype to u8. ice supports
up to 1024 ptypes, while u8 is [0...255]. u16 would be fine (I suggested
using 14 bits for it in libeth_rqe_info). But on the stack, I'd just go
with u32.
Please correct all the places where it's u8.
>
> skb->ip_summed = CHECKSUM_NONE;
>
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> + qword = le64_to_cpu(rx_desc->qw1);
> ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
>
> decoded = libie_rx_pt_parse(ptype);
[...]
> @@ -1143,7 +1142,8 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
>
> while (likely(total_rx_packets < (unsigned int)budget)) {
> struct libeth_fqe *rx_buffer;
> - union iavf_rx_desc *rx_desc;
> + struct iavf_rx_desc *rx_desc;
> + u16 ext_status = 0;
> unsigned int size;
> u16 vlan_tag = 0;
> u8 rx_ptype;
> @@ -1163,7 +1163,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
> * which is always zero because packet split isn't used, if the
> * hardware wrote DD then the length will be non-zero
> */
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> + qword = le64_to_cpu(rx_desc->qw1);
>
> /* This memory barrier is needed to keep us from reading
> * any other fields out of the rx_desc until we have
> @@ -1219,7 +1219,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
> /* probably a little skewed due to removing CRC */
> total_rx_bytes += skb->len;
>
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> + qword = le64_to_cpu(rx_desc->qw1);
So here you have multiple reads of the same qword within this function.
I think instead of just 1 variable, you need to define several, like
u64 qw0, qw2, ...
then read them from the descriptor where you need them for the first
time, and then just use onstack value for the rest of the function.
> rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
>
> /* populate checksum, VLAN, and protocol */
> @@ -1227,11 +1227,16 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
>
> if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
> rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
> - vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
> - if (rx_desc->wb.qword2.ext_status &
> - cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
> - rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> - vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
> + vlan_tag = le64_get_bits(rx_desc->qw0,
> + IAVF_RXD_LEGACY_L2TAG1_M);
> +
> + ext_status = le64_get_bits(rx_desc->qw2,
> + IAVF_RXD_LEGACY_EXT_STATUS_M);
> +
> + if ((ext_status & IAVF_RX_DESC_EXT_STATUS_L2TAG2P_M) &&
> + (rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2))
This second check for rx_ring->flags will be the same for the whole NAPI
polling loop. You can define a boolean on the stack outside the loop,
check this bit once and then just use this boolean here (*before*
checking for ext_status), it will be faster.
> + vlan_tag = le64_get_bits(rx_desc->qw2,
> + IAVF_RXD_LEGACY_L2TAG2_2_M);
So here you would have
1 bitop for fetching ext_status
1 bitop for checking L2TAG2P_M
instead, just check the bit directly in qw2.
>
> iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
> iavf_receive_skb(rx_ring, skb, vlan_tag);
[...]
> +/**
> + * struct iavf_rx_desc - Receive descriptor (both legacy and flexible)
> + * @qw0: quad word 0
> + * @qw1: quad word 1
> + * @qw2: quad word 2
> + * @qw3: quad word 3
Some description for these qwords, which fields they contain? This kdoc
looks generic and explains nothing unfortunately.
> + */
> +struct iavf_rx_desc {
> + aligned_le64 qw0;
> +/* The hash signature (RSS). */
> +#define IAVF_RXD_LEGACY_RSS_M GENMASK_ULL(63, 32)
> +/* Stripped L2 Tag from the receive packet. */
One-line comments shouldn't have
> +#define IAVF_RXD_LEGACY_L2TAG1_M GENMASK_ULL(33, 16)
> +
> + aligned_le64 qw1;
> + aligned_le64 qw2;
> +/* Extracted second word of the L2 Tag 2. */
You can rewrite this comment to reflect the actual value of this
definition -- that it's a VLAN tag (for example, I don't know how this
differs from the VLAN tag field above, so I'd like to see the
explanation here).
> +#define IAVF_RXD_LEGACY_L2TAG2_2_M GENMASK_ULL(63, 48)
> +/* Extended status bits. */
> +#define IAVF_RXD_LEGACY_EXT_STATUS_M GENMASK_ULL(11, 0)
(so this definition won't be needed after you address the comment I
wrote under the place where you use it)
> +
> + aligned_le64 qw3;
> +} __aligned(4 * sizeof(__le64));
static_assert(sizeof(this struct) == <size of the descriptor from the HW
spec>) would be nice to see here.
>
> enum iavf_rx_desc_status_bits {
> /* Note: These are predefined bit offsets */
> @@ -347,6 +294,8 @@ enum iavf_rx_desc_ext_status_bits {
> IAVF_RX_DESC_EXT_STATUS_PELONGB_SHIFT = 11,
> };
>
> +#define IAVF_RX_DESC_EXT_STATUS_L2TAG2P_M BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)
> +
> enum iavf_rx_desc_pe_status_bits {
> /* Note: These are predefined bit offsets */
> IAVF_RX_DESC_PE_STATUS_QPID_SHIFT = 0, /* 18 BITS */
> @@ -574,4 +523,5 @@ struct iavf_eth_stats {
> u64 tx_discards; /* tdpc */
> u64 tx_errors; /* tepc */
> };
> +
Probably unwanted change sneaked somehow (I'm not against it, but the
maintainers could be :p)
> #endif /* _IAVF_TYPE_H_ */
Thanks,
Olek
Powered by blists - more mailing lists