lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZtdqfLfHYvEKPE+r@boxer>
Date: Tue, 3 Sep 2024 21:58:52 +0200
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: Aleksandr Loktionov <aleksandr.loktionov@...el.com>
CC: <intel-wired-lan@...ts.osuosl.org>, <anthony.l.nguyen@...el.com>,
	<netdev@...r.kernel.org>, Jan Sokolowski <jan.sokolowski@...el.com>, "Padraig
 J Connolly" <padraig.j.connolly@...el.com>
Subject: Re: [PATCH iwl-next v3] i40e: add ability to reset vf for tx and rx
 mdd events

On Fri, Aug 30, 2024 at 09:28:07PM +0200, Aleksandr Loktionov wrote:
> In cases when vf sends malformed packets that are classified as
> malicious, sometimes it causes tx queue to freeze. This frozen queue can be
> stuck for several minutes being unusable. When mdd event occurs, there is a
> posibility to perform a graceful vf reset to quickly bring vf back to
> operational state.
> 
> Currently vf iqueues are being disabled if mdd event occurs.
> Add the ability to reset vf if tx or rx mdd occurs.
> Add mdd events logging throttling /* avoid dmesg polution */.
> Unify tx rx mdd messages formats.
> 
> Co-developed-by: Jan Sokolowski <jan.sokolowski@...el.com>
> Signed-off-by: Jan Sokolowski <jan.sokolowski@...el.com>
> Co-developed-by: Padraig J Connolly <padraig.j.connolly@...el.com>
> Signed-off-by:  Padraig J Connolly <padraig.j.connolly@...el.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@...el.com>
> ---
> v2->v3 fix compilation issue
> v1->v2 fix compilation issue
> ---
>  drivers/net/ethernet/intel/i40e/i40e.h        |   4 +-
>  .../net/ethernet/intel/i40e/i40e_debugfs.c    |   2 +-
>  .../net/ethernet/intel/i40e/i40e_ethtool.c    |   2 +
>  drivers/net/ethernet/intel/i40e/i40e_main.c   | 116 ++++++++++++++++--
>  .../ethernet/intel/i40e/i40e_virtchnl_pf.c    |   2 +-
>  .../ethernet/intel/i40e/i40e_virtchnl_pf.h    |  11 +-
>  6 files changed, 122 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index d546567..6d6683c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -87,6 +87,7 @@ enum i40e_state {
>  	__I40E_SERVICE_SCHED,
>  	__I40E_ADMINQ_EVENT_PENDING,
>  	__I40E_MDD_EVENT_PENDING,
> +	__I40E_MDD_VF_PRINT_PENDING,
>  	__I40E_VFLR_EVENT_PENDING,
>  	__I40E_RESET_RECOVERY_PENDING,
>  	__I40E_TIMEOUT_RECOVERY_PENDING,
> @@ -190,6 +191,7 @@ enum i40e_pf_flags {
>  	 */
>  	I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
>  	I40E_FLAG_VF_VLAN_PRUNING_ENA,
> +	I40E_FLAG_MDD_AUTO_RESET_VF,
>  	I40E_PF_FLAGS_NBITS,		/* must be last */
>  };
>  
> @@ -571,7 +573,7 @@ struct i40e_pf {
>  	int num_alloc_vfs;	/* actual number of VFs allocated */
>  	u32 vf_aq_requests;
>  	u32 arq_overflows;	/* Not fatal, possibly indicative of problems */
> -
> +	unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */
>  	/* DCBx/DCBNL capability for PF that indicates
>  	 * whether DCBx is managed by firmware or host
>  	 * based agent (LLDPAD). Also, indicates what
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> index abf624d..6a697bf 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> @@ -721,7 +721,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
>  		dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
>  			 vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
>  		dev_info(&pf->pdev->dev, "       num MDD=%lld\n",
> -			 vf->num_mdd_events);
> +			 vf->mdd_tx_events.count + vf->mdd_rx_events.count);
>  	} else {
>  		dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
>  	}
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> index 1d0d2e5..d146575 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> @@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
>  	I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
>  	I40E_PRIV_FLAG("vf-vlan-pruning",
>  		       I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
> +	I40E_PRIV_FLAG("mdd-auto-reset-vf",
> +		       I40E_FLAG_MDD_AUTO_RESET_VF, 0),

you don't tell us that this is implemented via priv-flag in the commit
message, would be good to include info about it.

>  };
>  
>  #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index cbcfada..28df3d5 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -11189,22 +11189,102 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
>  	i40e_reset_and_rebuild(pf, false, lock_acquired);
>  }
>  
> +/**
> + * i40e_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
> + * @pf: board private structure
> + * @vf: pointer to the VF structure
> + */
> +static void i40e_print_vf_rx_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf)
> +{
> +	dev_err(&pf->pdev->dev, "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
> +		vf->mdd_rx_events.count,
> +		pf->hw.pf_id,
> +		vf->vf_id,
> +		vf->default_lan_addr.addr,
> +		test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
> +}
> +
> +/**
> + * i40e_print_vf_tx_mdd_event - print VF Tx malicious driver detect event
> + * @pf: board private structure
> + * @vf: pointer to the VF structure
> + */
> +static void i40e_print_vf_tx_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf)
> +{
> +	dev_err(&pf->pdev->dev, "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
> +		vf->mdd_tx_events.count,
> +		pf->hw.pf_id,
> +		vf->vf_id,
> +		vf->default_lan_addr.addr,
> +		test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
> +}

Unnecesary code duplication, two functions with printing the very same
statement with a single different letter.

static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf, const char *rxtx)
{
	dev_err(&pf->pdev->dev, "%lld %s Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
		vf->mdd_tx_events.count,
		rxtx,
		pf->hw.pf_id,
		vf->vf_id,
		vf->default_lan_addr.addr,
		test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
}

	i40e_print_vf_mdd_event(pf, vf, "Rx");
	i40e_print_vf_mdd_event(pf, vf, "Tx");

> +
> +/**
> + * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
> + * @pf: pointer to the PF structure
> + *
> + * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
> + */
> +static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
> +{
> +	unsigned int i;
> +
> +	/* check that there are pending MDD events to print */
> +	if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
> +		return;
> +
> +	/* VF MDD event logs are rate limited to one second intervals */
> +	if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1))
> +		return;
> +
> +	pf->last_printed_mdd_jiffies = jiffies;

why homegrown rate limiting?

> +
> +	for (i = 0; i < pf->num_alloc_vfs; i++) {
> +		struct i40e_vf *vf = &pf->vf[i];
> +		bool is_printed = false;
> +
> +		/* only print Rx MDD event message if there are new events */
> +		if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
> +			vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
> +			i40e_print_vf_rx_mdd_event(pf, vf);
> +			is_printed = true;
> +		}
> +
> +		/* only print Tx MDD event message if there are new events */
> +		if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
> +			vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
> +			i40e_print_vf_tx_mdd_event(pf, vf);
> +			is_printed = true;
> +		}
> +
> +		if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
> +			dev_info(&pf->pdev->dev,
> +				 "Use PF Control I/F to re-enable the VF #%d\n",
> +				 i);
> +	}
> +}
> +
>  /**
>   * i40e_handle_mdd_event
>   * @pf: pointer to the PF structure
>   *
>   * Called from the MDD irq handler to identify possibly malicious vfs
>   **/
>  static void i40e_handle_mdd_event(struct i40e_pf *pf)
>  {
>  	struct i40e_hw *hw = &pf->hw;
>  	bool mdd_detected = false;
>  	struct i40e_vf *vf;
>  	u32 reg;
>  	int i;
>  
> -	if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
> +	if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
> +		/* Since the VF MDD event logging is rate limited, check if
> +		 * there are pending MDD events.
> +		 */
> +		i40e_print_vfs_mdd_events(pf);
>  		return;
> +	}
>  
>  	/* find what triggered the MDD event */
>  	reg = rd32(hw, I40E_GL_MDET_TX);
> @@ -11248,36 +11328,50 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
>  
>  	/* see if one of the VFs needs its hand slapped */
>  	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
> +		bool is_mdd_on_tx = false;
> +		bool is_mdd_on_rx = false;
> +
>  		vf = &(pf->vf[i]);
>  		reg = rd32(hw, I40E_VP_MDET_TX(i));
>  		if (reg & I40E_VP_MDET_TX_VALID_MASK) {
> +			set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
>  			wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
> -			vf->num_mdd_events++;
> -			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
> -				 i);
> -			dev_info(&pf->pdev->dev,
> -				 "Use PF Control I/F to re-enable the VF\n");
> +			vf->mdd_tx_events.count++;
>  			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
> +			is_mdd_on_tx = true;
>  		}
>  
>  		reg = rd32(hw, I40E_VP_MDET_RX(i));
>  		if (reg & I40E_VP_MDET_RX_VALID_MASK) {
> +			set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
>  			wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
> -			vf->num_mdd_events++;
> -			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
> -				 i);
> -			dev_info(&pf->pdev->dev,
> -				 "Use PF Control I/F to re-enable the VF\n");
> +			vf->mdd_rx_events.count++;
>  			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
> +			is_mdd_on_rx = true;
> +		}
> +
> +		if ((is_mdd_on_tx || is_mdd_on_rx) &&
> +		    test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
> +			/* VF MDD event counters will be cleared by
> +			 * reset, so print the event prior to reset.
> +			 */
> +			if (is_mdd_on_rx)
> +				i40e_print_vf_rx_mdd_event(pf, vf);
> +			if (is_mdd_on_tx)
> +				i40e_print_vf_tx_mdd_event(pf, vf);
> +
> +			i40e_vc_reset_vf(vf, true);
>  		}
>  	}
>  
>  	/* re-enable mdd interrupt cause */
>  	clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
>  	reg = rd32(hw, I40E_PFINT_ICR0_ENA);
>  	reg |=  I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
>  	wr32(hw, I40E_PFINT_ICR0_ENA, reg);
>  	i40e_flush(hw);
> +
> +	i40e_print_vfs_mdd_events(pf);
>  }
>  
>  /**
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> index 662622f..5b4618e 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> @@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
>   * @notify_vf: notify vf about reset or not
>   * Reset VF handler.
>   **/
> -static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
> +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
>  {
>  	struct i40e_pf *pf = vf->pf;
>  	int i;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> index 66f95e2..5cf74f1 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> @@ -64,6 +64,12 @@ struct i40evf_channel {
>  	u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
>  };
>  
> +struct i40e_mdd_vf_events {
> +	u64 count;      /* total count of Rx|Tx events */
> +	/* count number of the last printed event */
> +	u64 last_printed;
> +};
> +
>  /* VF information structure */
>  struct i40e_vf {
>  	struct i40e_pf *pf;
> @@ -92,7 +98,9 @@ struct i40e_vf {
>  
>  	u8 num_queue_pairs;	/* num of qps assigned to VF vsis */
>  	u8 num_req_queues;	/* num of requested qps */
> -	u64 num_mdd_events;	/* num of mdd events detected */
> +	/* num of mdd tx and rx events detected */
> +	struct i40e_mdd_vf_events mdd_rx_events;
> +	struct i40e_mdd_vf_events mdd_tx_events;
>  
>  	unsigned long vf_caps;	/* vf's adv. capabilities */
>  	unsigned long vf_states;	/* vf's runtime states */
> @@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
>  int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
>  			   u32 v_retval, u8 *msg, u16 msglen);
>  int i40e_vc_process_vflr_event(struct i40e_pf *pf);
> +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
>  bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
>  bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
>  void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
> -- 
> 2.25.1
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ