[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250207145710.GX554665@kernel.org>
Date: Fri, 7 Feb 2025 14:57:10 +0000
From: Simon Horman <horms@...nel.org>
To: Michal Swiatkowski <michal.swiatkowski@...ux.intel.com>
Cc: intel-wired-lan@...ts.osuosl.org, netdev@...r.kernel.org,
marcin.szycik@...ux.intel.com, jedrzej.jagielski@...el.com,
przemyslaw.kitszel@...el.com, piotr.kwapulinski@...el.com,
anthony.l.nguyen@...el.com, dawid.osuchowski@...el.com
Subject: Re: [iwl-next v1 3/4] ixgbe: add Tx hang detection unhandled MDD
On Fri, Feb 07, 2025 at 11:43:42AM +0100, Michal Swiatkowski wrote:
> From: Slawomir Mrozowicz <slawomirx.mrozowicz@...el.com>
>
> Add Tx Hang detection due to an unhandled MDD Event.
>
> Previously, a malicious VF could disable the entire port causing
> TX to hang on the E610 card.
> Those events that caused PF to freeze were not detected
> as an MDD event and usually required a Tx Hang watchdog timer
> to catch the suspension, and perform a physical function reset.
>
> Implement flows in the affected PF driver in such a way to check
> the cause of the hang, detect it as an MDD event and log an
> entry of the malicious VF that caused the Hang.
>
> The PF blocks the malicious VF, if it continues to be the source
> of several MDD events.
>
> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
> Reviewed-by: Marcin Szycik <marcin.szycik@...ux.intel.com>
> Signed-off-by: Slawomir Mrozowicz <slawomirx.mrozowicz@...el.com>
> Co-developed-by: Michal Swiatkowski <michal.swiatkowski@...ux.intel.com>
> Signed-off-by: Michal Swiatkowski <michal.swiatkowski@...ux.intel.com>
...
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> index aa3b498558bc..e07b56625595 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> @@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version {
> #define IXGBE_GCR_EXT_VT_MODE_16 0x00000001
> #define IXGBE_GCR_EXT_VT_MODE_32 0x00000002
> #define IXGBE_GCR_EXT_VT_MODE_64 0x00000003
> +#define IXGBE_GCR_EXT_VT_MODE_MASK 0x00000003
nit: For consistency I think spaces should be used to indent 0x00000003
> #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \
> IXGBE_GCR_EXT_VT_MODE_64)
>
...
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
...
> +static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx)
> +{
> + IXGBE_WRITE_REG(hw, IXGBE_TXDESCIC, queue * idx);
> + return IXGBE_READ_REG(hw, IXGBE_TXDESCIC);
> +}
> +
> +/**
> + * ixgbe_check_illegal_queue - search for queue with illegal packet
> + * @adapter: structure containing ring specific data
> + * @queue: queue index
> + *
> + * Check if tx descriptor connected with input queue
> + * contains illegal packet.
> + *
> + * Returns: true if queue contain illegal packet.
> + */
> +static bool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter,
> + u16 queue)
> +{
> + u32 hdr_len_reg, mss_len_reg, type_reg;
> + struct ixgbe_hw *hw = &adapter->hw;
> + u32 mss_len, header_len, reg;
> +
> + for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) {
> + /* HW will clear bit IXGBE_TXDESCIC_READY when address
> + * is written to address field. HW will set this bit
> + * when iCache read is done, and data is ready at TIC_DWx.
> + * Set descriptor address.
> + */
> + read_poll_timeout(ixgbe_poll_tx_icache, reg,
> + !(reg & IXGBE_TXDESCIC_READY), 0, 0, false,
> + hw, queue, i);
> +
> + /* read tx descriptor access registers */
> + hdr_len_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_VLAN_MACIP_LENS_REG));
> + type_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_TYPE_TUCMD_MLHL));
> + mss_len_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_MSS_L4LEN_IDX));
> +
> + /* check if Advanced Context Descriptor */
> + if (FIELD_GET(IXGBE_ADVTXD_DTYP_MASK, type_reg) !=
> + IXGBE_ADVTXD_DTYP_CTXT)
> + continue;
> +
> + /* check for illegal MSS and Header length */
> + mss_len = FIELD_GET(IXGBE_ADVTXD_MSS_MASK, mss_len_reg);
> + header_len = FIELD_GET(IXGBE_ADVTXD_HEADER_LEN_MASK,
> + hdr_len_reg);
> + if ((mss_len + header_len) > SZ_16K) {
> + e_warn(probe,
> + "mss len + header len too long\n");
nit: The above two lines can be a single line.
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +/**
> + * ixgbe_handle_mdd_event - handle mdd event
> + * @adapter: structure containing ring specific data
> + * @tx_ring: tx descriptor ring to handle
> + *
> + * Reset VF driver if malicious vf detected or
> + * illegal packet in an any queue detected.
> + */
> +static void ixgbe_handle_mdd_event(struct ixgbe_adapter *adapter,
> + struct ixgbe_ring *tx_ring)
> +{
> + u16 vf, q;
> +
> + if (adapter->vfinfo && ixgbe_check_mdd_event(adapter)) {
> + /* vf mdd info and malicious vf detected */
> + if (!ixgbe_get_vf_idx(adapter, tx_ring->queue_index, &vf))
> + ixgbe_vf_handle_tx_hang(adapter, vf);
> + } else {
> + /* malicious vf not detected */
> + for (q = 0; q < IXGBE_MAX_TX_QUEUES; q++) {
> + if (ixgbe_check_illegal_queue(adapter, q) &&
> + !ixgbe_get_vf_idx(adapter, q, &vf))
> + /* illegal queue detected */
> + ixgbe_vf_handle_tx_hang(adapter, vf);
It looks like ixgbe_vf_handle_tx_hang() will run for each illegal queue.
Could that be more than once for a given vf? If so, is that desirable?
> + }
> + }
> +}
> +
> /**
> * ixgbe_clean_tx_irq - Reclaim resources after transmit completes
> * @q_vector: structure containing interrupt and ring information
...
Powered by blists - more mailing lists