[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <42943746-771e-432e-b5e0-98267987ed65@suse.de>
Date: Tue, 8 Jul 2025 10:54:19 +0200
From: Hannes Reinecke <hare@...e.de>
To: Karan Tilak Kumar <kartilak@...co.com>, sebaddel@...co.com
Cc: arulponn@...co.com, djhawar@...co.com, gcboffa@...co.com,
mkai2@...co.com, satishkh@...co.com, aeasi@...co.com, jejb@...ux.ibm.com,
martin.petersen@...cle.com, linux-scsi@...r.kernel.org,
linux-kernel@...r.kernel.org, jmeneghi@...hat.com, revers@...hat.com,
dan.carpenter@...aro.org, stable@...r.kernel.org
Subject: Re: [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when
FDMI times out
On 6/16/25 18:26, Karan Tilak Kumar wrote:
> When both the RHBA and RPA FDMI requests time out, fnic reuses a frame
> to send ABTS for each of them. On send completion, this causes an
> attempt to free the same frame twice that leads to a crash.
>
> Fix crash by allocating separate frames for RHBA and RPA,
> and modify ABTS logic accordingly.
>
> Tested by checking MDS for FDMI information.
> Tested by using instrumented driver to:
> Drop PLOGI response
> Drop RHBA response
> Drop RPA response
> Drop RHBA and RPA response
> Drop PLOGI response + ABTS response
> Drop RHBA response + ABTS response
> Drop RPA response + ABTS response
> Drop RHBA and RPA response + ABTS response for both of them
>
> Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
> Reviewed-by: Sesidhar Baddela <sebaddel@...co.com>
> Reviewed-by: Arulprabhu Ponnusamy <arulponn@...co.com>
> Reviewed-by: Gian Carlo Boffa <gcboffa@...co.com>
> Tested-by: Arun Easi <aeasi@...co.com>
> Co-developed-by: Arun Easi <aeasi@...co.com>
> Signed-off-by: Arun Easi <aeasi@...co.com>
> Tested-by: Karan Tilak Kumar <kartilak@...co.com>
> Cc: <stable@...r.kernel.org>
> Signed-off-by: Karan Tilak Kumar <kartilak@...co.com>
> ---
> Changes between v4 and v5:
> - Incorporate review comments from John:
> - Refactor patches
>
> Changes between v3 and v4:
> - Incorporate review comments from Dan:
> - Remove comments from Cc tag
>
> Changes between v2 and v3:
> - Incorporate review comments from Dan:
> - Add Cc to stable
>
> Changes between v1 and v2:
> - Incorporate review comments from Dan:
> - Add Fixes tag
> ---
> drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++---------
> drivers/scsi/fnic/fnic.h | 2 +-
> drivers/scsi/fnic/fnic_fdls.h | 1 +
> 3 files changed, 87 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
> index c2b6f4eb338e..0ee1b74967b9 100644
> --- a/drivers/scsi/fnic/fdls_disc.c
> +++ b/drivers/scsi/fnic/fdls_disc.c
> @@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
> iport->fabric.timer_pending = 1;
> }
>
> -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
> + uint16_t oxid)
> {
> - uint8_t *frame;
> + struct fc_frame_header *pfdmi_abts;
> uint8_t d_id[3];
> + uint8_t *frame;
> struct fnic *fnic = iport->fnic;
> - struct fc_frame_header *pfabric_abts;
> - unsigned long fdmi_tov;
> - uint16_t oxid;
> - uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> - sizeof(struct fc_frame_header);
>
> frame = fdls_alloc_frame(iport);
> if (frame == NULL) {
> FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
> "Failed to allocate frame to send FDMI ABTS");
> - return;
> + return NULL;
> }
>
> - pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
> + pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
> fdls_init_fabric_abts_frame(frame, iport);
>
> hton24(d_id, FC_FID_MGMT_SERV);
> - FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
> + FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
> + FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
> +
> + return frame;
> +}
> +
> +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> +{
> + uint8_t *frame;
> + unsigned long fdmi_tov;
> + uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> + sizeof(struct fc_frame_header);
>
> if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
> - oxid = iport->active_oxid_fdmi_plogi;
> - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> + frame = fdls_alloc_init_fdmi_abts_frame(iport,
> + iport->active_oxid_fdmi_plogi);
> + if (frame == NULL)
> + return;
> +
> fnic_send_fcoe_frame(iport, frame, frame_size);
> } else {
> if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
> - oxid = iport->active_oxid_fdmi_rhba;
> - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> + frame = fdls_alloc_init_fdmi_abts_frame(iport,
> + iport->active_oxid_fdmi_rhba);
> + if (frame == NULL)
> + return;
> +
> fnic_send_fcoe_frame(iport, frame, frame_size);
> }
> if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
> - oxid = iport->active_oxid_fdmi_rpa;
> - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> + frame = fdls_alloc_init_fdmi_abts_frame(iport,
> + iport->active_oxid_fdmi_rpa);
> + if (frame == NULL) {
> + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
> + goto arm_timer;
> + else
> + return;
> + }
> +
> fnic_send_fcoe_frame(iport, frame, frame_size);
> }
> }
>
> +arm_timer:
> fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
> mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
> iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
> @@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
> spin_unlock_irqrestore(&fnic->fnic_lock, flags);
> }
>
> +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
> +{
> + struct fnic *fnic = iport->fnic;
> +
> + iport->fabric.fdmi_pending = 0;
> + /* If max retries not exhausted, start over from fdmi plogi */
> + if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> + iport->fabric.fdmi_retry++;
> + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> + "Retry FDMI PLOGI. FDMI retry: %d",
> + iport->fabric.fdmi_retry);
> + fdls_send_fdmi_plogi(iport);
> + }
> +}
> +
> void fdls_fdmi_timer_callback(struct timer_list *t)
> {
> struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
> @@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
> FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
>
> - iport->fabric.fdmi_pending = 0;
> - /* If max retries not exhaused, start over from fdmi plogi */
> - if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> - iport->fabric.fdmi_retry++;
> - FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> - "retry fdmi timer %d", iport->fabric.fdmi_retry);
> - fdls_send_fdmi_plogi(iport);
> - }
> + fdls_fdmi_retry_plogi(iport);
> FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
> spin_unlock_irqrestore(&fnic->fnic_lock, flags);
> @@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
> switch (FNIC_FRAME_TYPE(oxid)) {
> case FNIC_FRAME_TYPE_FDMI_PLOGI:
> fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
> +
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> break;
> case FNIC_FRAME_TYPE_FDMI_RHBA:
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
> +
> + /* If RPA is still pending, don't turn off ABORT PENDING.
> + * We count on the timer to detect the ABTS timeout and take
> + * corrective action.
> + */
> + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> +
> fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
> break;
> case FNIC_FRAME_TYPE_FDMI_RPA:
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
> +
> + /* If RHBA is still pending, don't turn off ABORT PENDING.
> + * We count on the timer to detect the ABTS timeout and take
> + * corrective action.
> + */
> + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
> + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> +
> fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
> break;
> default:
> @@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
> break;
> }
>
> - timer_delete_sync(&iport->fabric.fdmi_timer);
> - iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> -
> - fdls_send_fdmi_plogi(iport);
> + /*
> + * Only if ABORT PENDING is off, delete the timer, and if no other
> + * operations are pending, retry FDMI.
> + * Otherwise, let the timer pop and take the appropriate action.
> + */
> + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
> + timer_delete_sync(&iport->fabric.fdmi_timer);
> + if (!iport->fabric.fdmi_pending)
> + fdls_fdmi_retry_plogi(iport);
> + }
> }
>
> static void
> diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
> index 6c5f6046b1f5..86e293ce530d 100644
> --- a/drivers/scsi/fnic/fnic.h
> +++ b/drivers/scsi/fnic/fnic.h
> @@ -30,7 +30,7 @@
>
> #define DRV_NAME "fnic"
> #define DRV_DESCRIPTION "Cisco FCoE HBA Driver"
> -#define DRV_VERSION "1.8.0.0"
> +#define DRV_VERSION "1.8.0.1"
> #define PFX DRV_NAME ": "
> #define DFX DRV_NAME "%d: "
>
> diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
> index 8e610b65ad57..531d0b37e450 100644
> --- a/drivers/scsi/fnic/fnic_fdls.h
> +++ b/drivers/scsi/fnic/fnic_fdls.h
> @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
> bool fdls_delete_tport(struct fnic_iport_s *iport,
> struct fnic_tport_s *tport);
> void fdls_fdmi_timer_callback(struct timer_list *t);
> +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
>
> /* fnic_fcs.c */
> void fnic_fdls_init(struct fnic *fnic, int usefip);
Please make the version change a separate patch and add it as the last
patch in the series. That way you'll have better tracking if all patches
from that series are applied.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@...e.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Powered by blists - more mailing lists