[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241029145011.4obrgprcaksworlq@DEN-DL-M70577>
Date: Tue, 29 Oct 2024 14:50:11 +0000
From: Daniel Machon <daniel.machon@...rochip.com>
To: Jacob Keller <jacob.e.keller@...el.com>
CC: Vladimir Oltean <olteanv@...il.com>, Andrew Morton
<akpm@...ux-foundation.org>, Eric Dumazet <edumazet@...gle.com>, Jakub
Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Tony Nguyen
<anthony.l.nguyen@...el.com>, Przemek Kitszel <przemyslaw.kitszel@...el.com>,
Masahiro Yamada <masahiroy@...nel.org>, <linux-kernel@...r.kernel.org>,
<netdev@...r.kernel.org>
Subject: Re: [PATCH net-next v2 6/9] ice: use <linux/packing.h> for Tx and Rx
queue context data
Hi Jacob,
> The ice driver needs to write the Tx and Rx queue context when programming
> Tx and Rx queues. This is currently done using some bespoke custom logic
> via the ice_set_ctx() and its helper functions, along with bit position
> definitions in the ice_tlan_ctx_info and ice_rlan_ctx_info structures.
>
> This logic does work, but is problematic for several reasons:
>
> 1) ice_set_ctx requires a helper function for each byte size being packed,
> as it uses a separate function to pack u8, u16, u32, and u64 fields.
> This requires 4 functions which contain near-duplicate logic with the
> types changed out.
>
> 2) The logic in the ice_pack_ctx_word, ice_pack_ctx_dword, and
> ice_pack_ctx_qword does not handle values which straddle alignment
> boundaries very well. This requires that several fields in the
> ice_tlan_ctx_info and ice_rlan_ctx_info be a size larger than their bit
> size should require.
>
> 3) Future support for live migration will require adding unpacking
> functions to take the packed hardware context and unpack it into the
> ice_rlan_ctx and ice_tlan_ctx structures. Implementing this would
> require implementing ice_get_ctx, and its associated helper functions,
> which essentially doubles the amount of code required.
>
> The Linux kernel has had a packing library that can handle this logic since
> commit 554aae35007e ("lib: Add support for generic packing operations").
> The library was recently extended with support for packing or unpacking an
> array of fields, with a similar structure as the ice_ctx_ele structure.
>
> Replace the ice-specific ice_set_ctx() logic with the recently added
> pack_fields and packed_field_s infrastructure from <linux/packing.h>
>
> For API simplicity, the Tx and Rx queue context are programmed using
> separate ice_pack_txq_ctx() and ice_pack_rxq_ctx(). This avoids needing to
> export the packed_field_s arrays. The functions can pointers to the
> appropriate ice_txq_ctx_buf_t and ice_rxq_ctx_buf_t types, ensuring that
> only buffers of the appropriate size are passed.
>
> Signed-off-by: Jacob Keller <jacob.e.keller@...el.com>
> ---
> drivers/net/ethernet/intel/ice/ice_common.h | 5 +-
> drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 14 --
> drivers/net/ethernet/intel/ice/ice_base.c | 3 +-
> drivers/net/ethernet/intel/ice/ice_common.c | 249 +++++--------------------
> drivers/net/ethernet/intel/Kconfig | 3 +
> 5 files changed, 50 insertions(+), 224 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
> index 27208a60cece..a68bea3934e3 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.h
> +++ b/drivers/net/ethernet/intel/ice/ice_common.h
> @@ -92,9 +92,8 @@ ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
> bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
> int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
> void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
> -extern const struct ice_ctx_ele ice_tlan_ctx_info[];
> -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info);
> +
> +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf);
>
> extern struct mutex ice_global_cfg_lock_sw;
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> index a76e5b0e7861..31d4a445d640 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> @@ -408,20 +408,6 @@ struct ice_rlan_ctx {
> u8 prefena; /* NOTE: normally must be set to 1 at init */
> };
>
> -struct ice_ctx_ele {
> - u16 offset;
> - u16 size_of;
> - u16 width;
> - u16 lsb;
> -};
> -
> -#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \
> - .offset = offsetof(struct _struct, _ele), \
> - .size_of = sizeof_field(struct _struct, _ele), \
> - .width = _width, \
> - .lsb = _lsb, \
> -}
> -
> /* for hsplit_0 field of Rx RLAN context */
> enum ice_rlan_ctx_rx_hsplit_0 {
> ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0,
> diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
> index 260942877968..0a325dec804e 100644
> --- a/drivers/net/ethernet/intel/ice/ice_base.c
> +++ b/drivers/net/ethernet/intel/ice/ice_base.c
> @@ -909,8 +909,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
> ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
> /* copy context contents into the qg_buf */
> qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
> - ice_set_ctx(hw, (u8 *)&tlan_ctx, (u8 *)&qg_buf->txqs[0].txq_ctx,
> - ice_tlan_ctx_info);
> + ice_pack_txq_ctx(&tlan_ctx, &qg_buf->txqs[0].txq_ctx);
>
> /* init queue specific tail reg. It is referred as
> * transmit comm scheduler queue doorbell.
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index 48d95cb49864..905f5c745a7b 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -6,6 +6,7 @@
> #include "ice_adminq_cmd.h"
> #include "ice_flow.h"
> #include "ice_ptp_hw.h"
> +#include <linux/packing.h>
>
> #define ICE_PF_RESET_WAIT_COUNT 300
> #define ICE_MAX_NETLIST_SIZE 10
> @@ -1385,9 +1386,12 @@ static int ice_copy_rxq_ctx_to_hw(struct ice_hw *hw,
> return 0;
> }
>
> +#define ICE_CTX_STORE(struct_name, struct_field, width, lsb) \
> + PACKED_FIELD((lsb) + (width) - 1, (lsb), struct struct_name, struct_field)
> +
> /* LAN Rx Queue Context */
> -static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
> - /* Field Width LSB */
> +static const struct packed_field_s ice_rlan_ctx_fields[] = {
> + /* Field Width LSB */
> ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0),
> ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13),
> ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32),
> @@ -1408,9 +1412,26 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
> ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196),
> ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198),
> ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201),
> - { 0 }
> };
>
> +/**
> + * ice_pack_rxq_ctx - Pack Rx queue context into a HW buffer
> + * @ctx: the Rx queue context to pack
> + * @buf: the HW buffer to pack into
> + *
> + * Pack the Rx queue context from the CPU-friendly unpacked buffer into its
> + * bit-packed HW layout.
> + */
> +static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx,
> + ice_rxq_ctx_buf_t *buf)
> +{
> + CHECK_PACKED_FIELDS_20(ice_rlan_ctx_fields, ICE_RXQ_CTX_SZ);
> + BUILD_BUG_ON(sizeof(*buf) != ICE_RXQ_CTX_SZ);
> +
> + pack_fields(buf, sizeof(*buf), ctx, ice_rlan_ctx_fields,
> + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST);
> +}
> +
FWIW, I noticed that smatch bails out checking all the CHECK_PACKED_FIELDS_*
variants >= 20, with the warning:
ice_common.c:1486 ice_pack_txq_ctx() parse error: OOM: 3000148Kb sm_state_count = 413556
ice_common.c:1486 ice_pack_txq_ctx() warn: Function too hairy. No more merges.
ice_common.c:1486 ice_pack_txq_ctx() parse error: Function too hairy. Giving up. 43 second
Maybe this can just be ignored .. not sure :-)
> /**
> * ice_write_rxq_ctx
> * @hw: pointer to the hardware structure
> @@ -1431,12 +1452,13 @@ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
>
> rlan_ctx->prefena = 1;
>
> - ice_set_ctx(hw, (u8 *)rlan_ctx, (u8 *)&buf, ice_rlan_ctx_info);
> + ice_pack_rxq_ctx(rlan_ctx, &buf);
> +
> return ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index);
> }
>
> /* LAN Tx Queue Context */
> -const struct ice_ctx_ele ice_tlan_ctx_info[] = {
> +static const struct packed_field_s ice_tlan_ctx_fields[] = {
> /* Field Width LSB */
> ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0),
> ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57),
> @@ -1465,9 +1487,25 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
> ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165),
> ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166),
> ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168),
> - { 0 }
> };
>
> +/**
> + * ice_pack_txq_ctx - Pack Tx queue context into a HW buffer
> + * @ctx: the Tx queue context to pack
> + * @buf: the HW buffer to pack into
> + *
> + * Pack the Tx queue context from the CPU-friendly unpacked buffer into its
> + * bit-packed HW layout.
> + */
> +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf)
> +{
> + CHECK_PACKED_FIELDS_27(ice_tlan_ctx_fields, ICE_TXQ_CTX_SZ);
> + BUILD_BUG_ON(sizeof(*buf) != ICE_TXQ_CTX_SZ);
> +
> + pack_fields(buf, sizeof(*buf), ctx, ice_tlan_ctx_fields,
> + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST);
> +}
> +
Same here with the 27 variant.
> /* Sideband Queue command wrappers */
>
> /**
> @@ -4545,205 +4583,6 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
>
> /* End of FW Admin Queue command wrappers */
>
> -/**
> - * ice_pack_ctx_byte - write a byte to a packed context structure
> - * @src_ctx: unpacked source context structure
> - * @dest_ctx: packed destination context data
> - * @ce_info: context element description
> - */
> -static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info)
> -{
> - u8 src_byte, dest_byte, mask;
> - u8 *from, *dest;
> - u16 shift_width;
> -
> - /* copy from the next struct field */
> - from = src_ctx + ce_info->offset;
> -
> - /* prepare the bits and mask */
> - shift_width = ce_info->lsb % 8;
> - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
> -
> - src_byte = *from;
> - src_byte <<= shift_width;
> - src_byte &= mask;
> -
> - /* get the current bits from the target bit string */
> - dest = dest_ctx + (ce_info->lsb / 8);
> -
> - memcpy(&dest_byte, dest, sizeof(dest_byte));
> -
> - dest_byte &= ~mask; /* get the bits not changing */
> - dest_byte |= src_byte; /* add in the new bits */
> -
> - /* put it all back */
> - memcpy(dest, &dest_byte, sizeof(dest_byte));
> -}
> -
> -/**
> - * ice_pack_ctx_word - write a word to a packed context structure
> - * @src_ctx: unpacked source context structure
> - * @dest_ctx: packed destination context data
> - * @ce_info: context element description
> - */
> -static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info)
> -{
> - u16 src_word, mask;
> - __le16 dest_word;
> - u8 *from, *dest;
> - u16 shift_width;
> -
> - /* copy from the next struct field */
> - from = src_ctx + ce_info->offset;
> -
> - /* prepare the bits and mask */
> - shift_width = ce_info->lsb % 8;
> - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
> -
> - /* don't swizzle the bits until after the mask because the mask bits
> - * will be in a different bit position on big endian machines
> - */
> - src_word = *(u16 *)from;
> - src_word <<= shift_width;
> - src_word &= mask;
> -
> - /* get the current bits from the target bit string */
> - dest = dest_ctx + (ce_info->lsb / 8);
> -
> - memcpy(&dest_word, dest, sizeof(dest_word));
> -
> - dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */
> - dest_word |= cpu_to_le16(src_word); /* add in the new bits */
> -
> - /* put it all back */
> - memcpy(dest, &dest_word, sizeof(dest_word));
> -}
> -
> -/**
> - * ice_pack_ctx_dword - write a dword to a packed context structure
> - * @src_ctx: unpacked source context structure
> - * @dest_ctx: packed destination context data
> - * @ce_info: context element description
> - */
> -static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info)
> -{
> - u32 src_dword, mask;
> - __le32 dest_dword;
> - u8 *from, *dest;
> - u16 shift_width;
> -
> - /* copy from the next struct field */
> - from = src_ctx + ce_info->offset;
> -
> - /* prepare the bits and mask */
> - shift_width = ce_info->lsb % 8;
> - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
> -
> - /* don't swizzle the bits until after the mask because the mask bits
> - * will be in a different bit position on big endian machines
> - */
> - src_dword = *(u32 *)from;
> - src_dword <<= shift_width;
> - src_dword &= mask;
> -
> - /* get the current bits from the target bit string */
> - dest = dest_ctx + (ce_info->lsb / 8);
> -
> - memcpy(&dest_dword, dest, sizeof(dest_dword));
> -
> - dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */
> - dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */
> -
> - /* put it all back */
> - memcpy(dest, &dest_dword, sizeof(dest_dword));
> -}
> -
> -/**
> - * ice_pack_ctx_qword - write a qword to a packed context structure
> - * @src_ctx: unpacked source context structure
> - * @dest_ctx: packed destination context data
> - * @ce_info: context element description
> - */
> -static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info)
> -{
> - u64 src_qword, mask;
> - __le64 dest_qword;
> - u8 *from, *dest;
> - u16 shift_width;
> -
> - /* copy from the next struct field */
> - from = src_ctx + ce_info->offset;
> -
> - /* prepare the bits and mask */
> - shift_width = ce_info->lsb % 8;
> - mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width);
> -
> - /* don't swizzle the bits until after the mask because the mask bits
> - * will be in a different bit position on big endian machines
> - */
> - src_qword = *(u64 *)from;
> - src_qword <<= shift_width;
> - src_qword &= mask;
> -
> - /* get the current bits from the target bit string */
> - dest = dest_ctx + (ce_info->lsb / 8);
> -
> - memcpy(&dest_qword, dest, sizeof(dest_qword));
> -
> - dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */
> - dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */
> -
> - /* put it all back */
> - memcpy(dest, &dest_qword, sizeof(dest_qword));
> -}
> -
> -/**
> - * ice_set_ctx - set context bits in packed structure
> - * @hw: pointer to the hardware structure
> - * @src_ctx: pointer to a generic non-packed context structure
> - * @dest_ctx: pointer to memory for the packed structure
> - * @ce_info: List of Rx context elements
> - */
> -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
> - const struct ice_ctx_ele *ce_info)
> -{
> - int f;
> -
> - for (f = 0; ce_info[f].width; f++) {
> - /* We have to deal with each element of the FW response
> - * using the correct size so that we are correct regardless
> - * of the endianness of the machine.
> - */
> - if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
> - ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
> - f, ce_info[f].width, ce_info[f].size_of);
> - continue;
> - }
> - switch (ce_info[f].size_of) {
> - case sizeof(u8):
> - ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]);
> - break;
> - case sizeof(u16):
> - ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]);
> - break;
> - case sizeof(u32):
> - ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]);
> - break;
> - case sizeof(u64):
> - ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]);
> - break;
> - default:
> - return -EINVAL;
> - }
> - }
> -
> - return 0;
> -}
> -
Some nice cleanup!
> /**
> * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
> * @hw: pointer to the HW struct
> diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
> index 20bc40eec487..c4ea8ae65a95 100644
> --- a/drivers/net/ethernet/intel/Kconfig
> +++ b/drivers/net/ethernet/intel/Kconfig
> @@ -292,6 +292,9 @@ config ICE
> select DIMLIB
> select LIBIE
> select NET_DEVLINK
> + select PACKING
> + select PACKING_CHECK_FIELDS_20
> + select PACKING_CHECK_FIELDS_27
> select PLDMFW
> select DPLL
> help
>
> --
> 2.47.0.265.g4ca455297942
>
>
/Daniel
Powered by blists - more mailing lists