lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <IA3PR11MB8986DB21D9701025AB3E5501E5BFA@IA3PR11MB8986.namprd11.prod.outlook.com>
Date: Mon, 29 Dec 2025 15:16:49 +0000
From: "Loktionov, Aleksandr" <aleksandr.loktionov@...el.com>
To: "Aaron, Ma" <aaron.ma@...onical.com>, "Nguyen, Anthony L"
	<anthony.l.nguyen@...el.com>, "Kitszel, Przemyslaw"
	<przemyslaw.kitszel@...el.com>, "andrew+netdev@...n.ch"
	<andrew+netdev@...n.ch>, "davem@...emloft.net" <davem@...emloft.net>,
	"edumazet@...gle.com" <edumazet@...gle.com>, "kuba@...nel.org"
	<kuba@...nel.org>, "pabeni@...hat.com" <pabeni@...hat.com>,
	"intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [Intel-wired-lan] [PATCH v3 2/2] ice: recap the VSI and QoS info
 after rebuild



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@...osl.org> On Behalf
> Of Aaron Ma via Intel-wired-lan
> Sent: Thursday, December 25, 2025 7:21 AM
> To: Nguyen, Anthony L <anthony.l.nguyen@...el.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@...el.com>; andrew+netdev@...n.ch;
> davem@...emloft.net; edumazet@...gle.com; kuba@...nel.org;
> pabeni@...hat.com; intel-wired-lan@...ts.osuosl.org;
> netdev@...r.kernel.org; linux-kernel@...r.kernel.org
> Subject: [Intel-wired-lan] [PATCH v3 2/2] ice: recap the VSI and QoS
> info after rebuild
> 
> Fix IRDMA hardware initialization timeout (-110) after resume by
> separating VSI-dependent configuration from RDMA resource allocation,
> ensuring VSI is rebuilt before IRDMA accesses it.
> 
> After resume from suspend, IRDMA hardware initialization fails:
>   ice: IRDMA hardware initialization FAILED init_state=4 status=-110
> 
> Separate RDMA initialization into two phases:
> 1. ice_init_rdma() - Allocate resources only (no VSI/QoS access, no
> plug) 2. ice_rdma_finalize_setup() - Assign VSI/QoS info and plug
> device
> 
> This allows:
> - ice_init_rdma() to stay in ice_resume() (mirrors ice_deinit_rdma()
>   in ice_suspend()
> - VSI assignment deferred until after ice_vsi_rebuild() completes
> - QoS info updated after ice_dcb_rebuild() completes
> - Device plugged only when control queues, VSI, and DCB are all ready
> 
> Fixes: bc69ad74867db ("ice: avoid IRQ collision to fix init failure on
> ACPI S3 resume")
> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@...el.com>
> Signed-off-by: Aaron Ma <aaron.ma@...onical.com>
> ---
> V1 -> V2: no changes.
> V2 -> V3:
> - mirrors init_rdma in resume as Tony Nguyen suggested to fix the
> memleak and move ice_plug_aux_dev/ice_unplug_aux_dev out of
> init/deinit rdma.
> - ensure the correct VSI/QoS info is loaded after rebuild.
> 
>  drivers/net/ethernet/intel/ice/ice.h      |  1 +
>  drivers/net/ethernet/intel/ice/ice_idc.c  | 41 +++++++++++++++++-----
> -  drivers/net/ethernet/intel/ice/ice_main.c |  7 +++-
>  3 files changed, 38 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice.h
> b/drivers/net/ethernet/intel/ice/ice.h
> index 147aaee192a79..6463c1fea7871 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -989,6 +989,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum
> ice_reset_req reset);  void ice_print_link_msg(struct ice_vsi *vsi,
> bool isup);  int ice_plug_aux_dev(struct ice_pf *pf);  void
> ice_unplug_aux_dev(struct ice_pf *pf);
> +void ice_rdma_finalize_setup(struct ice_pf *pf);
>  int ice_init_rdma(struct ice_pf *pf);
>  void ice_deinit_rdma(struct ice_pf *pf);  bool
> ice_is_wol_supported(struct ice_hw *hw); diff --git
> a/drivers/net/ethernet/intel/ice/ice_idc.c
> b/drivers/net/ethernet/intel/ice/ice_idc.c
> index 420d45c2558b6..b6079a6cb7736 100644
> --- a/drivers/net/ethernet/intel/ice/ice_idc.c
> +++ b/drivers/net/ethernet/intel/ice/ice_idc.c
> @@ -360,6 +360,35 @@ void ice_unplug_aux_dev(struct ice_pf *pf)
>  	auxiliary_device_uninit(adev);
>  }
> 
> +/**
> + * ice_rdma_finalize_setup - Complete RDMA setup after VSI is ready
> + * @pf: ptr to ice_pf
> + *
> + * Sets VSI-dependent information and plugs aux device.
> + * Must be called after ice_init_rdma(), ice_vsi_rebuild(), and
> + * ice_dcb_rebuild() complete.
> + */
> +void ice_rdma_finalize_setup(struct ice_pf *pf) {
> +	struct iidc_rdma_priv_dev_info *privd;
> +
> +	if (!ice_is_rdma_ena(pf) || !pf->cdev_info)
> +		return;
> +
> +	privd = pf->cdev_info->iidc_priv;
> +	if (!privd || !pf->vsi[0] || !pf->vsi[0]->netdev)
Direct array index access assumes VSI 0 exists without verifying the array size.
What do you think about:

    if (!privd || !pf->vsi || !pf->vsi[0] || !pf->vsi[0]->netdev)
        return;

?

> +		return;
> +
> +	/* Assign VSI info now that VSI is valid */
> +	privd->netdev = pf->vsi[0]->netdev;
> +	privd->vport_id = pf->vsi[0]->vsi_num;
> +
> +	/* Update QoS info after DCB has been rebuilt */
> +	ice_setup_dcb_qos_info(pf, &privd->qos_info);
> +
> +	ice_plug_aux_dev(pf);
> +}
> +
>  /**
>   * ice_init_rdma - initializes PF for RDMA use
>   * @pf: ptr to ice_pf
> @@ -398,23 +427,16 @@ int ice_init_rdma(struct ice_pf *pf)
>  	}
> 
>  	cdev->iidc_priv = privd;
> -	privd->netdev = pf->vsi[0]->netdev;
> 
>  	privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr;
>  	cdev->pdev = pf->pdev;
> -	privd->vport_id = pf->vsi[0]->vsi_num;
> 
>  	pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2;
> -	ice_setup_dcb_qos_info(pf, &privd->qos_info);
> -	ret = ice_plug_aux_dev(pf);
> -	if (ret)
> -		goto err_plug_aux_dev;
> +
>  	return 0;
> 
> -err_plug_aux_dev:
> -	pf->cdev_info->adev = NULL;
> -	xa_erase(&ice_aux_id, pf->aux_idx);
>  err_alloc_xa:
> +	xa_erase(&ice_aux_id, pf->aux_idx);
>  	kfree(privd);
>  err_privd_alloc:
>  	kfree(cdev);
> @@ -432,7 +454,6 @@ void ice_deinit_rdma(struct ice_pf *pf)
>  	if (!ice_is_rdma_ena(pf))
>  		return;
> 
> -	ice_unplug_aux_dev(pf);
>  	xa_erase(&ice_aux_id, pf->aux_idx);
>  	kfree(pf->cdev_info->iidc_priv);
>  	kfree(pf->cdev_info);
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c
> b/drivers/net/ethernet/intel/ice/ice_main.c
> index 4bb68e7a00f5f..1851e9932cefe 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -5135,6 +5135,9 @@ int ice_load(struct ice_pf *pf)
>  	if (err)
>  		goto err_init_rdma;
> 
> +	/* Finalize RDMA: VSI already created, assign info and plug
> device */
> +	ice_rdma_finalize_setup(pf);
> +
>  	ice_service_task_restart(pf);
> 
>  	clear_bit(ICE_DOWN, pf->state);
> @@ -5166,6 +5169,7 @@ void ice_unload(struct ice_pf *pf)
> 
>  	devl_assert_locked(priv_to_devlink(pf));
> 
> +	ice_unplug_aux_dev(pf);
>  	ice_deinit_rdma(pf);
>  	ice_deinit_features(pf);
>  	ice_tc_indir_block_unregister(vsi);
> @@ -5594,6 +5598,7 @@ static int ice_suspend(struct device *dev)
>  	 */
>  	disabled = ice_service_task_stop(pf);
> 
> +	ice_unplug_aux_dev(pf);
>  	ice_deinit_rdma(pf);
> 
>  	/* Already suspended?, then there is nothing to do */ @@ -
> 7803,7 +7808,7 @@ static void ice_rebuild(struct ice_pf *pf, enum
> ice_reset_req reset_type)
> 
>  	ice_health_clear(pf);
> 
> -	ice_plug_aux_dev(pf);
> +	ice_rdma_finalize_setup(pf);
>  	if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
>  		ice_lag_rebuild(pf);
> 
> --
> 2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ