[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <94cf16ed-709b-4cab-9325-52670db25902@intel.com>
Date: Tue, 11 Jun 2024 13:47:46 +0200
From: Alexander Lobakin <aleksander.lobakin@...el.com>
To: Mateusz Polchlopek <mateusz.polchlopek@...el.com>, Jacob Keller
<jacob.e.keller@...el.com>
CC: <intel-wired-lan@...ts.osuosl.org>, <netdev@...r.kernel.org>, "Wojciech
Drewek" <wojciech.drewek@...el.com>
Subject: Re: [Intel-wired-lan] [PATCH iwl-next v7 09/12] iavf: refactor
iavf_clean_rx_irq to support legacy and flex descriptors
From: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
Date: Tue, 4 Jun 2024 09:13:57 -0400
> From: Jacob Keller <jacob.e.keller@...el.com>
>
> Using VIRTCHNL_VF_OFFLOAD_FLEX_DESC, the iAVF driver is capable of
> negotiating to enable the advanced flexible descriptor layout. Add the
> flexible NIC layout (RXDID=2) as a member of the Rx descriptor union.
>
> Also add bit position definitions for the status and error indications
> that are needed.
>
> The iavf_clean_rx_irq function needs to extract a few fields from the Rx
> descriptor, including the size, rx_ptype, and vlan_tag.
> Move the extraction to a separate function that decodes the fields into
> a structure. This will reduce the burden for handling multiple
> descriptor types by keeping the relevant extraction logic in one place.
>
> To support handling an additional descriptor format with minimal code
> duplication, refactor Rx checksum handling so that the general logic
> is separated from the bit calculations. Introduce an iavf_rx_desc_decoded
> structure which holds the relevant bits decoded from the Rx descriptor.
> This will enable implementing flexible descriptor handling without
> duplicating the general logic twice.
>
> Introduce an iavf_extract_flex_rx_fields, iavf_flex_rx_hash, and
> iavf_flex_rx_csum functions which operate on the flexible NIC descriptor
> format instead of the legacy 32 byte format. Based on the negotiated
> RXDID, select the correct function for processing the Rx descriptors.
>
> With this change, the Rx hot path should be functional when using either
> the default legacy 32byte format or when we switch to the flexible NIC
> layout.
>
> Modify the Rx hot path to add support for the flexible descriptor
> format and add request enabling Rx timestamps for all queues.
>
> As in ice, make sure we bump the checksum level if the hardware detected
> a packet type which could have an outer checksum. This is important
> because hardware only verifies the inner checksum.
>
> Signed-off-by: Jacob Keller <jacob.e.keller@...el.com>
> Reviewed-by: Wojciech Drewek <wojciech.drewek@...el.com>
> Co-developed-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
> Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
> ---
> drivers/net/ethernet/intel/iavf/iavf_txrx.c | 354 +++++++++++++-----
> drivers/net/ethernet/intel/iavf/iavf_txrx.h | 8 +
> drivers/net/ethernet/intel/iavf/iavf_type.h | 147 ++++++--
> .../net/ethernet/intel/iavf/iavf_virtchnl.c | 5 +
> 4 files changed, 391 insertions(+), 123 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> index 26b424fd6718..97da5af52ad7 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> @@ -895,63 +895,68 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
> return true;
> }
>
> +/* iavf_rx_csum_decoded
> + *
> + * Checksum offload bits decoded from the receive descriptor.
> + */
> +struct iavf_rx_csum_decoded {
> + u8 l3l4p : 1;
> + u8 ipe : 1;
> + u8 eipe : 1;
> + u8 eudpe : 1;
> + u8 ipv6exadd : 1;
> + u8 l4e : 1;
> + u8 pprs : 1;
> + u8 nat : 1;
> +};
I see the same struct in idpf, probably a candidate for libeth.
> +
> /**
> - * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
> + * iavf_rx_csum - Indicate in skb if hw indicated a good checksum
> * @vsi: the VSI we care about
> * @skb: skb currently being received and modified
> - * @rx_desc: the receive descriptor
> + * @ptype: decoded ptype information
> + * @csum_bits: decoded Rx descriptor information
> **/
> -static void iavf_rx_checksum(struct iavf_vsi *vsi,
> - struct sk_buff *skb,
> - union iavf_rx_desc *rx_desc)
> +static void iavf_rx_csum(struct iavf_vsi *vsi, struct sk_buff *skb,
Can't @vsi be const?
> + struct libeth_rx_pt *ptype,
struct libeth_rx_pt is smaller than a pointer to it. Pass it directly
> + struct iavf_rx_csum_decoded *csum_bits)
Same for this struct.
> {
> - struct libeth_rx_pt decoded;
> - u32 rx_error, rx_status;
> bool ipv4, ipv6;
> - u8 ptype;
> - u64 qword;
>
> skb->ip_summed = CHECKSUM_NONE;
>
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> - ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> -
> - decoded = libie_rx_pt_parse(ptype);
> - if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> - return;
> -
> - rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword);
> - rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword);
> -
> /* did the hardware decode the packet and checksum? */
> - if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
> + if (!csum_bits->l3l4p)
> return;
>
> - ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
> - ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
> + ipv4 = libeth_rx_pt_get_ip_ver(*ptype) == LIBETH_RX_PT_OUTER_IPV4;
> + ipv6 = libeth_rx_pt_get_ip_ver(*ptype) == LIBETH_RX_PT_OUTER_IPV6;
>
> - if (ipv4 &&
> - (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
> - BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
> + if (ipv4 && (csum_bits->ipe || csum_bits->eipe))
> goto checksum_fail;
>
> /* likely incorrect csum if alternate IP extension headers found */
> - if (ipv6 &&
> - rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
> - /* don't increment checksum err here, non-fatal err */
> + if (ipv6 && csum_bits->ipv6exadd)
> return;
>
> /* there was some L4 error, count error and punt packet to the stack */
> - if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT))
> + if (csum_bits->l4e)
> goto checksum_fail;
>
> /* handle packets that were not able to be checksummed due
> * to arrival speed, in this case the stack can compute
> * the csum.
> */
> - if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
> + if (csum_bits->pprs)
> return;
>
> + /* If there is an outer header present that might contain a checksum
> + * we need to bump the checksum level by 1 to reflect the fact that
> + * we are indicating we validated the inner checksum.
> + */
> + if (ptype->tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
> + skb->csum_level = 1;
> +
> skb->ip_summed = CHECKSUM_UNNECESSARY;
> return;
For the whole function: you need to use unlikely() for checksum errors
to not slow down regular frames.
Also, I would even unlikely() packets with not verified checksum as it's
really rare case.
Optimize hotpath for most common workloads.
>
> @@ -960,22 +965,105 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi,
> }
>
> /**
> - * iavf_rx_hash - set the hash value in the skb
> + * iavf_legacy_rx_csum - Indicate in skb if hw indicated a good cksum
> + * @vsi: the VSI we care about
> + * @skb: skb currently being received and modified
> + * @rx_desc: the receive descriptor
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
> + **/
> +static void iavf_legacy_rx_csum(struct iavf_vsi *vsi,
> + struct sk_buff *skb,
> + union iavf_rx_desc *rx_desc)
@vsi and @rx_desc can be const.
> +{
> + struct iavf_rx_csum_decoded csum_bits;
> + struct libeth_rx_pt decoded;
> +
> + u32 rx_error;
> + u64 qword;
> + u16 ptype;
> +
> + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> + ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> + rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword);
You don't need @rx_error before libeth_rx_pt_has_checksum().
> + decoded = libie_rx_pt_parse(ptype);
> +
> + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> + return;
> +
> + csum_bits.ipe = FIELD_GET(IAVF_RX_DESC_ERROR_IPE_MASK, rx_error);
So, @rx_error is a field of @qword and then there are more subfields?
Why not extract those fields directly from @qword?
> + csum_bits.eipe = FIELD_GET(IAVF_RX_DESC_ERROR_EIPE_MASK, rx_error);
> + csum_bits.l4e = FIELD_GET(IAVF_RX_DESC_ERROR_L4E_MASK, rx_error);
> + csum_bits.pprs = FIELD_GET(IAVF_RX_DESC_ERROR_PPRS_MASK, rx_error);
> + csum_bits.l3l4p = FIELD_GET(IAVF_RX_DESC_STATUS_L3L4P_MASK, rx_error);
> + csum_bits.ipv6exadd = FIELD_GET(IAVF_RX_DESC_STATUS_IPV6EXADD_MASK,
> + rx_error);
> + csum_bits.nat = 0;
> + csum_bits.eudpe = 0;
Initialize the whole struct with = { } at the declaration site and
remove this.
> +
> + iavf_rx_csum(vsi, skb, &decoded, &csum_bits);
In order to avoid having 2 call sites for this, make
iavf_{flex,legacy}_rx_csum() return @csum_bits and call iavf_rx_csum()
outside of them once.
> +}
> +
> +/**
> + * iavf_flex_rx_csum - Indicate in skb if hw indicated a good cksum
> + * @vsi: the VSI we care about
> + * @skb: skb currently being received and modified
> + * @rx_desc: the receive descriptor
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + **/
> +static void iavf_flex_rx_csum(struct iavf_vsi *vsi, struct sk_buff *skb,
> + union iavf_rx_desc *rx_desc)
Same for const.
> +{
> + struct iavf_rx_csum_decoded csum_bits;
> + struct libeth_rx_pt decoded;
> + u16 rx_status0, ptype;
> +
> + rx_status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);
This is not needed before libeth_rx_pt_has_checksum().
> + ptype = FIELD_GET(IAVF_RX_FLEX_DESC_PTYPE_M,
> + le16_to_cpu(rx_desc->flex_wb.ptype_flexi_flags0));
You also access this field later when extracting base fields. Shouldn't
this be combined somehow?
> + decoded = libie_rx_pt_parse(ptype);
> +
> + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> + return;
> +
> + csum_bits.ipe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
> + rx_status0);
> + csum_bits.eipe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
> + rx_status0);
> + csum_bits.l4e = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
> + rx_status0);
> + csum_bits.eudpe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
> + rx_status0);
> + csum_bits.l3l4p = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_L3L4P_M,
> + rx_status0);
> + csum_bits.ipv6exadd = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
> + rx_status0);
> + csum_bits.nat = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS1_NAT_M, rx_status0);
> + csum_bits.pprs = 0;
See above for struct initialization.
> +
> + iavf_rx_csum(vsi, skb, &decoded, &csum_bits);
See above.
> +}
> +
> +/**
> + * iavf_legacy_rx_hash - set the hash value in the skb
> * @ring: descriptor ring
> * @rx_desc: specific descriptor
> * @skb: skb currently being received and modified
> * @rx_ptype: Rx packet type
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
> **/
> -static void iavf_rx_hash(struct iavf_ring *ring,
> - union iavf_rx_desc *rx_desc,
> - struct sk_buff *skb,
> - u8 rx_ptype)
> +static void iavf_legacy_rx_hash(struct iavf_ring *ring,
> + union iavf_rx_desc *rx_desc,
Const for both.
> + struct sk_buff *skb, u8 rx_ptype)
> {
> + const __le64 rss_mask = cpu_to_le64(IAVF_RX_DESC_STATUS_FLTSTAT_MASK);
> struct libeth_rx_pt decoded;
> u32 hash;
> - const __le64 rss_mask =
> - cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
> - IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
Looks like unrelated, but nice change anyway.
>
> decoded = libie_rx_pt_parse(rx_ptype);
> if (!libeth_rx_pt_has_hash(ring->netdev, decoded))
> @@ -987,6 +1075,38 @@ static void iavf_rx_hash(struct iavf_ring *ring,
> }
> }
>
> +/**
> + * iavf_flex_rx_hash - set the hash value in the skb
> + * @ring: descriptor ring
> + * @rx_desc: specific descriptor
> + * @skb: skb currently being received and modified
> + * @rx_ptype: Rx packet type
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + **/
> +static void iavf_flex_rx_hash(struct iavf_ring *ring,
> + union iavf_rx_desc *rx_desc,
Const.
> + struct sk_buff *skb, u16 rx_ptype)
Why is @rx_ptype u16 here, but u8 above? Just use u32 for both.
> +{
> + struct libeth_rx_pt decoded;
> + u16 status0;
> + u32 hash;
> +
> + if (!(ring->netdev->features & NETIF_F_RXHASH))
This is checked in libeth_rx_pt_has_hash(), why check 2 times?
> + return;
> +
> + decoded = libie_rx_pt_parse(rx_ptype);
> + if (!libeth_rx_pt_has_hash(ring->netdev, decoded))
> + return;
> +
> + status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);
> + if (status0 & IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_M) {
> + hash = le32_to_cpu(rx_desc->flex_wb.rss_hash);
> + libeth_rx_pt_set_hash(skb, hash, decoded);
> + }
> +}
Also, just parse rx_ptype once in process_skb_fields(), you don't need
to do that in each function.
> +
> /**
> * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
> * @rx_ring: rx descriptor ring packet is being transacted on
> @@ -998,14 +1118,17 @@ static void iavf_rx_hash(struct iavf_ring *ring,
> * order to populate the hash, checksum, VLAN, protocol, and
> * other fields within the skb.
> **/
> -static void
> -iavf_process_skb_fields(struct iavf_ring *rx_ring,
> - union iavf_rx_desc *rx_desc, struct sk_buff *skb,
> - u8 rx_ptype)
> +static void iavf_process_skb_fields(struct iavf_ring *rx_ring,
> + union iavf_rx_desc *rx_desc,
Const.
> + struct sk_buff *skb, u16 rx_ptype)
> {
> - iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> -
> - iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
> + if (rx_ring->rxdid == VIRTCHNL_RXDID_1_32B_BASE) {
Any likely/unlikely() here? Or it's 50:50?
> + iavf_legacy_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> + iavf_legacy_rx_csum(rx_ring->vsi, skb, rx_desc);
> + } else {
> + iavf_flex_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> + iavf_flex_rx_csum(rx_ring->vsi, skb, rx_desc);
> + }
>
> skb_record_rx_queue(skb, rx_ring->queue_index);
>
> @@ -1092,7 +1215,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
> /**
> * iavf_is_non_eop - process handling of non-EOP buffers
> * @rx_ring: Rx ring being processed
> - * @rx_desc: Rx descriptor for current buffer
> + * @fields: Rx descriptor extracted fields
> * @skb: Current socket buffer containing buffer in progress
> *
> * This function updates next to clean. If the buffer is an EOP buffer
> @@ -1101,7 +1224,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
> * that this is in fact a non-EOP buffer.
> **/
> static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
> - union iavf_rx_desc *rx_desc,
> + struct iavf_rx_extracted *fields,
Pass value instead of pointer.
> struct sk_buff *skb)
Is it still needed?
> {
> u32 ntc = rx_ring->next_to_clean + 1;
> @@ -1113,8 +1236,7 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
> prefetch(IAVF_RX_DESC(rx_ring, ntc));
>
> /* if we are the last buffer then there is nothing else to do */
> -#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
> - if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
> + if (likely(fields->end_of_packet))
> return false;
>
> rx_ring->rx_stats.non_eop_descs++;
> @@ -1122,6 +1244,91 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
> return true;
> }
>
> +/**
> + * iavf_extract_legacy_rx_fields - Extract fields from the Rx descriptor
> + * @rx_ring: rx descriptor ring
> + * @rx_desc: the descriptor to process
> + * @fields: storage for extracted values
> + *
> + * Decode the Rx descriptor and extract relevant information including the
> + * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
> + */
> +static void iavf_extract_legacy_rx_fields(struct iavf_ring *rx_ring,
> + union iavf_rx_desc *rx_desc,
Consts.
> + struct iavf_rx_extracted *fields)
Return a struct &iavf_rx_extracted instead of passing a pointer to it.
> +{
> + u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> +
> + fields->size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword);
> + fields->rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> +
> + if (qword & IAVF_RX_DESC_STATUS_L2TAG1P_MASK &&
> + rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
> + fields->vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
> +
> + if (rx_desc->wb.qword2.ext_status &
> + cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
Bitops must have own pairs of braces.
> + rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> + fields->vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
> +
> + fields->end_of_packet = FIELD_GET(IAVF_RX_DESC_STATUS_EOF_MASK, qword);
> + fields->rxe = FIELD_GET(BIT(IAVF_RXD_QW1_ERROR_SHIFT), qword);
> +}
> +
> +/**
> + * iavf_extract_flex_rx_fields - Extract fields from the Rx descriptor
> + * @rx_ring: rx descriptor ring
> + * @rx_desc: the descriptor to process
> + * @fields: storage for extracted values
> + *
> + * Decode the Rx descriptor and extract relevant information including the
> + * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + */
> +static void iavf_extract_flex_rx_fields(struct iavf_ring *rx_ring,
> + union iavf_rx_desc *rx_desc,
> + struct iavf_rx_extracted *fields)
Same for everything.
> +{
> + u16 status0, status1, flexi_flags0;
> +
> + fields->size = FIELD_GET(IAVF_RX_FLEX_DESC_PKT_LEN_M,
> + le16_to_cpu(rx_desc->flex_wb.pkt_len));
le16_get_bits().
> +
> + flexi_flags0 = le16_to_cpu(rx_desc->flex_wb.ptype_flexi_flags0);
> +
> + fields->rx_ptype = FIELD_GET(IAVF_RX_FLEX_DESC_PTYPE_M, flexi_flags0);
le16_get_bits() instead of these two?
> +
> + status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);
> + if (status0 & IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_M &&
> + rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
Braces for bitops.
> + fields->vlan_tag = le16_to_cpu(rx_desc->flex_wb.l2tag1);
> +
> + status1 = le16_to_cpu(rx_desc->flex_wb.status_error1);
> + if (status1 & IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_M &&
> + rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> + fields->vlan_tag = le16_to_cpu(rx_desc->flex_wb.l2tag2_2nd);
(same)
> +
> + fields->end_of_packet = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS_ERR0_EOP_BIT,
> + status0);
> + fields->rxe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS_ERR0_RXE_BIT,
> + status0);
> +}
> +
> +static void iavf_extract_rx_fields(struct iavf_ring *rx_ring,
> + union iavf_rx_desc *rx_desc,
> + struct iavf_rx_extracted *fields)
Consts + return struct @fields directly.
> +{
> + if (rx_ring->rxdid == VIRTCHNL_RXDID_1_32B_BASE)
You check this several times, this could be combined and optimized.
> + iavf_extract_legacy_rx_fields(rx_ring, rx_desc, fields);
> + else
> + iavf_extract_flex_rx_fields(rx_ring, rx_desc, fields);
> +}
> +
> /**
> * iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
> * @rx_ring: rx descriptor ring to transact packets on
> @@ -1142,12 +1349,9 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
> bool failure = false;
>
> while (likely(total_rx_packets < (unsigned int)budget)) {
> + struct iavf_rx_extracted fields = {};
> struct libeth_fqe *rx_buffer;
> union iavf_rx_desc *rx_desc;
> - unsigned int size;
> - u16 vlan_tag = 0;
> - u8 rx_ptype;
> - u64 qword;
>
> /* return some buffers to hardware, one at a time is too slow */
> if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
> @@ -1158,35 +1362,27 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
>
> rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
>
> - /* status_error_len will always be zero for unused descriptors
> - * because it's cleared in cleanup, and overlaps with hdr_addr
> - * which is always zero because packet split isn't used, if the
> - * hardware wrote DD then the length will be non-zero
> - */
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> -
> /* This memory barrier is needed to keep us from reading
> * any other fields out of the rx_desc until we have
> * verified the descriptor has been written back.
> */
> dma_rmb();
> -#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
> - if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
> + if (!iavf_test_staterr(rx_desc, IAVF_RX_DESC_STATUS_DD_MASK))
> break;
>
> - size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword);
> + iavf_extract_rx_fields(rx_ring, rx_desc, &fields);
>
> iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
>
> rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean];
> - if (!libeth_rx_sync_for_cpu(rx_buffer, size))
> + if (!libeth_rx_sync_for_cpu(rx_buffer, fields.size))
> goto skip_data;
>
> /* retrieve a buffer from the ring */
> if (skb)
> - iavf_add_rx_frag(skb, rx_buffer, size);
> + iavf_add_rx_frag(skb, rx_buffer, fields.size);
> else
> - skb = iavf_build_skb(rx_buffer, size);
> + skb = iavf_build_skb(rx_buffer, fields.size);
>
> /* exit if we failed to retrieve a buffer */
> if (!skb) {
> @@ -1197,15 +1393,14 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
> skip_data:
> cleaned_count++;
>
> - if (iavf_is_non_eop(rx_ring, rx_desc, skb) || unlikely(!skb))
> + if (iavf_is_non_eop(rx_ring, &fields, skb) || unlikely(!skb))
> continue;
>
> - /* ERR_MASK will only have valid bits if EOP set, and
> - * what we are doing here is actually checking
> - * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
> - * the error field
> + /* RXE field in descriptor is an indication of the MAC errors
> + * (like CRC, alignment, oversize etc). If it is set then iavf
> + * should finish.
> */
> - if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
> + if (unlikely(fields.rxe)) {
> dev_kfree_skb_any(skb);
> skb = NULL;
> continue;
> @@ -1219,22 +1414,11 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
> /* probably a little skewed due to removing CRC */
> total_rx_bytes += skb->len;
>
> - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> - rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> -
> /* populate checksum, VLAN, and protocol */
> - iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
> -
> - if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
> - rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
> - vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
> - if (rx_desc->wb.qword2.ext_status &
> - cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
> - rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> - vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
BTW I'm wondering whether filling the whole @fields can be less
optimized than accesssing descriptor fields one by one like it was here
before.
I mean, in some cases you won't need all the fields from the extracted
struct, but they will be read and initialized anyway.
> + iavf_process_skb_fields(rx_ring, rx_desc, skb, fields.rx_ptype);
>
> iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
> - iavf_receive_skb(rx_ring, skb, vlan_tag);
> + iavf_receive_skb(rx_ring, skb, fields.vlan_tag);
> skb = NULL;
>
> /* update budget accounting */
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> index 17309d8625ac..3661cd57a068 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> @@ -99,6 +99,14 @@ static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc,
> cpu_to_le64(stat_err_bits));
> }
>
> +struct iavf_rx_extracted {
> + unsigned int size;
> + u16 vlan_tag;
> + u16 rx_ptype;
> + u8 end_of_packet:1;
> + u8 rxe:1;
> +};
Also something libethish, but not sure for this one.
> +
> /* How many Rx Buffers do we bundle into one write to the hardware ? */
> #define IAVF_RX_INCREMENT(r, i) \
> do { \
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
> index f6b09e57abce..82c16a720807 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_type.h
> +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
> @@ -206,6 +206,45 @@ union iavf_16byte_rx_desc {
> } wb; /* writeback */
> };
>
> +/* Rx Flex Descriptor NIC Profile
> + * RxDID Profile ID 2
> + * Flex-field 0: RSS hash lower 16-bits
> + * Flex-field 1: RSS hash upper 16-bits
> + * Flex-field 2: Flow ID lower 16-bits
> + * Flex-field 3: Flow ID higher 16-bits
> + * Flex-field 4: reserved, VLAN ID taken from L2Tag
> + */
> +struct iavf_32byte_rx_flex_wb {
> + /* Qword 0 */
> + u8 rxdid;
> + u8 mir_id_umb_cast;
> + __le16 ptype_flexi_flags0;
> + __le16 pkt_len;
> + __le16 hdr_len_sph_flex_flags1;
> +
> + /* Qword 1 */
> + __le16 status_error0;
> + __le16 l2tag1;
> + __le32 rss_hash;
> +
> + /* Qword 2 */
> + __le16 status_error1;
> + u8 flexi_flags2;
> + u8 ts_low;
> + __le16 l2tag2_1st;
> + __le16 l2tag2_2nd;
> +
> + /* Qword 3 */
> + __le32 flow_id;
> + union {
> + struct {
> + __le16 rsvd;
> + __le16 flow_id_ipv6;
> + } flex;
> + __le32 ts_high;
> + } flex_ts;
> +};
I'm wondering whether HW descriptors can be defined as just a bunch of
u64 qwords instead of all those u8/__le16 etc. fields. That would be faster.
In case this would work differently on BE and LE, #ifdefs.
> +
> union iavf_32byte_rx_desc {
> struct {
> __le64 pkt_addr; /* Packet buffer address */
> @@ -253,35 +292,34 @@ union iavf_32byte_rx_desc {
> } hi_dword;
> } qword3;
> } wb; /* writeback */
> + struct iavf_32byte_rx_flex_wb flex_wb;
So, already existing formats were described here directly, but flex is
declared as a field? Can this be more consistent?
> };
>
> -enum iavf_rx_desc_status_bits {
> - /* Note: These are predefined bit offsets */
> - IAVF_RX_DESC_STATUS_DD_SHIFT = 0,
> - IAVF_RX_DESC_STATUS_EOF_SHIFT = 1,
> - IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT = 2,
> - IAVF_RX_DESC_STATUS_L3L4P_SHIFT = 3,
> - IAVF_RX_DESC_STATUS_CRCP_SHIFT = 4,
> - IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */
> - IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT = 7,
> - /* Note: Bit 8 is reserved in X710 and XL710 */
> - IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8,
> - IAVF_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */
> - IAVF_RX_DESC_STATUS_FLM_SHIFT = 11,
> - IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT = 12, /* 2 BITS */
> - IAVF_RX_DESC_STATUS_LPBK_SHIFT = 14,
> - IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15,
> - IAVF_RX_DESC_STATUS_RESERVED_SHIFT = 16, /* 2 BITS */
> - /* Note: For non-tunnel packets INT_UDP_0 is the right status for
> - * UDP header
> - */
> - IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18,
> - IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */
> -};
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_DESC_STATUS_DD_MASK BIT(0)
> +#define IAVF_RX_DESC_STATUS_EOF_MASK BIT(1)
> +#define IAVF_RX_DESC_STATUS_L2TAG1P_MASK BIT(2)
> +#define IAVF_RX_DESC_STATUS_L3L4P_MASK BIT(3)
> +#define IAVF_RX_DESC_STATUS_CRCP_MASK BIT(4)
> +#define IAVF_RX_DESC_STATUS_TSYNINDX_MASK GENMASK_ULL(6, 5)
> +#define IAVF_RX_DESC_STATUS_TSYNVALID_MASK BIT(7)
> +/* Note: Bit 8 is reserved in X710 and XL710 */
> +#define IAVF_RX_DESC_STATUS_EXT_UDP_0_MASK BIT(8)
> +#define IAVF_RX_DESC_STATUS_UMBCAST_MASK GENMASK_ULL(10, 9)
> +#define IAVF_RX_DESC_STATUS_FLM_MASK BIT(11)
> +#define IAVF_RX_DESC_STATUS_FLTSTAT_MASK GENMASK_ULL(13, 12)
> +#define IAVF_RX_DESC_STATUS_LPBK_MASK BIT(14)
> +#define IAVF_RX_DESC_STATUS_IPV6EXADD_MASK BIT(15)
> +#define IAVF_RX_DESC_STATUS_RESERVED_MASK GENMASK_ULL(17, 16)
> +/* Note: For non-tunnel packets INT_UDP_0 is the right status for
> + * UDP header
> + */
> +#define IAVF_RX_DESC_STATUS_INT_UDP_0_MASK BIT(18)
> +
> +#define IAVF_RX_FLEX_DESC_STATUS_ERR0_EOP_BIT BIT(1)
> +#define IAVF_RX_FLEX_DESC_STATUS_ERR0_RXE_BIT BIT(10)
>
> -#define IAVF_RXD_QW1_STATUS_SHIFT 0
> -#define IAVF_RXD_QW1_STATUS_MASK ((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \
> - << IAVF_RXD_QW1_STATUS_SHIFT)
> +#define IAVF_RXD_QW1_STATUS_MASK (BIT(19) - 1)
GENMASK().
>
> #define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT
> #define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK (0x3UL << \
> @@ -301,18 +339,16 @@ enum iavf_rx_desc_fltstat_values {
> #define IAVF_RXD_QW1_ERROR_SHIFT 19
> #define IAVF_RXD_QW1_ERROR_MASK (0xFFUL << IAVF_RXD_QW1_ERROR_SHIFT)
>
> -enum iavf_rx_desc_error_bits {
> - /* Note: These are predefined bit offsets */
> - IAVF_RX_DESC_ERROR_RXE_SHIFT = 0,
> - IAVF_RX_DESC_ERROR_RECIPE_SHIFT = 1,
> - IAVF_RX_DESC_ERROR_HBO_SHIFT = 2,
> - IAVF_RX_DESC_ERROR_L3L4E_SHIFT = 3, /* 3 BITS */
> - IAVF_RX_DESC_ERROR_IPE_SHIFT = 3,
> - IAVF_RX_DESC_ERROR_L4E_SHIFT = 4,
> - IAVF_RX_DESC_ERROR_EIPE_SHIFT = 5,
> - IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT = 6,
> - IAVF_RX_DESC_ERROR_PPRS_SHIFT = 7
> -};
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_DESC_ERROR_RXE_MASK BIT(0)
> +#define IAVF_RX_DESC_ERROR_RECIPE_MASK BIT(1)
> +#define IAVF_RX_DESC_ERROR_HBO_MASK BIT(2)
> +#define IAVF_RX_DESC_ERROR_L3L4E_MASK GENMASK_ULL(5, 3)
> +#define IAVF_RX_DESC_ERROR_IPE_MASK BIT(3)
> +#define IAVF_RX_DESC_ERROR_L4E_MASK BIT(4)
> +#define IAVF_RX_DESC_ERROR_EIPE_MASK BIT(5)
> +#define IAVF_RX_DESC_ERROR_OVERSIZE_MASK BIT(6)
> +#define IAVF_RX_DESC_ERROR_PPRS_MASK BIT(7)
>
> enum iavf_rx_desc_error_l3l4e_fcoe_masks {
> IAVF_RX_DESC_ERROR_L3L4E_NONE = 0,
> @@ -325,6 +361,41 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks {
> #define IAVF_RXD_QW1_PTYPE_SHIFT 30
> #define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT)
>
> +/* for iavf_32byte_rx_flex_wb.ptype_flexi_flags0 member */
> +#define IAVF_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */
Redundant braces + GENMASK()
> +
> +/* for iavf_32byte_rx_flex_wb.pkt_length member */
> +#define IAVF_RX_FLEX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */
Same.
> +
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_FLEX_DESC_STATUS0_DD_M BIT(0)
> +#define IAVF_RX_FLEX_DESC_STATUS0_EOF_M BIT(1)
> +#define IAVF_RX_FLEX_DESC_STATUS0_HBO_M BIT(2)
> +#define IAVF_RX_FLEX_DESC_STATUS0_L3L4P_M BIT(3)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_M BIT(4)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_M BIT(5)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M BIT(6)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M BIT(7)
> +#define IAVF_RX_FLEX_DESC_STATUS0_LPBK_M BIT(8)
> +#define IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_M BIT(9)
> +#define IAVF_RX_FLEX_DESC_STATUS0_RXE_M BIT(10)
> +#define IAVF_RX_FLEX_DESC_STATUS0_CRCP_ BIT(11)
> +#define IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_M BIT(12)
> +#define IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_M BIT(13)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M BIT(14)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M BIT(15)
> +
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_FLEX_DESC_STATUS1_CPM_M (0xFULL) /* 4 bits */
Redundant braces.
+ GENMASK_ULL(7, 0)?
> +#define IAVF_RX_FLEX_DESC_STATUS1_NAT_M BIT(4)
> +#define IAVF_RX_FLEX_DESC_STATUS1_CRYPTO_M BIT(5)
> +/* [10:6] reserved */
> +#define IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_M BIT(11)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M BIT(12)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M BIT(13)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M BIT(14)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M BIT(15)
> +
> #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38
> #define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \
> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> index 2693c3ad0830..5cbb375b7063 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> @@ -402,6 +402,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
> int pairs = adapter->num_active_queues;
> struct virtchnl_queue_pair_info *vqpi;
> u32 i, max_frame;
> + u8 rx_flags = 0;
> size_t len;
>
> max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset);
> @@ -419,6 +420,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
> if (!vqci)
> return;
>
> + if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP))
> + rx_flags |= VIRTCHNL_PTP_RX_TSTAMP;
> +
> vqci->vsi_id = adapter->vsi_res->vsi_id;
> vqci->num_queue_pairs = pairs;
> vqpi = vqci->qpair;
> @@ -441,6 +445,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
> if (CRC_OFFLOAD_ALLOWED(adapter))
> vqpi->rxq.crc_disable = !!(adapter->netdev->features &
> NETIF_F_RXFCS);
> + vqpi->rxq.flags = rx_flags;
> vqpi++;
> }
Thanks,
Olek
Powered by blists - more mailing lists