linux-kernel - Re: [50/98] [SCSI] libfc: fix free of fc_rport

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4B5F82C1.6050108@cisco.com>
Date:	Tue, 26 Jan 2010 16:03:13 -0800
From:	Joe Eykholt <jeykholt@...co.com>
To:	Greg KH <gregkh@...e.de>
CC:	linux-kernel@...r.kernel.org, stable@...nel.org,
	stable-review@...nel.org, torvalds@...ux-foundation.org,
	akpm@...ux-foundation.org, alan@...rguk.ukuu.org.uk,
	Robert Love <robert.w.love@...el.com>,
	James Bottomley <James.Bottomley@...e.de>
Subject: Re: [50/98] [SCSI] libfc: fix free of fc_rport_priv with timer pending

Greg KH wrote:
> 2.6.32-stable review patch.  If anyone has any objections, please let us know.
> 
> ------------------
> 
> From: Joe Eykholt <jeykholt@...co.com>
> 
> commit b4a9c7ede96e90f7b1ec009ce7256059295e76df upstream.
> 
> Timer crashes were caused by freeing a struct fc_rport_priv
> with a timer pending, causing the timer facility list to be
> corrupted.  This was during FC uplink flap tests with a lot
> of targets.
> 
> After discovery, we were doing an PLOGI on an rdata that was
> in DELETE state but not yet removed from the lookup list.
> This moved the rdata from DELETE state to PLOGI state.
> If the PLOGI exchange allocation failed and needed to be
> retried, the timer scheduling could race with the free
> being done by fc_rport_work().
> 
> When fc_rport_login() is called on a rport in DELETE state,
> move it to a new state RESTART.  In fc_rport_work, when
> handling a LOGO, STOPPED or FAILED event, look for restart
> state.  In the RESTART case, don't take the rdata off the
> list and after the transport remote port is deleted and
> exchanges are reset, re-login to the remote port.
> 
> Note that the new RESTART state also corrects a problem we
> had when re-discovering a port that had moved to DELETE state.
> In that case, a new rdata was created, but the old rdata
> would do an exchange manager reset affecting the FC_ID
> for both the new rdata and old rdata.  With the new state,
> the new port isn't logged into until after any old exchanges
> are reset.
> 
> Signed-off-by: Joe Eykholt <jeykholt@...co.com>
> Signed-off-by: Robert Love <robert.w.love@...el.com>
> Signed-off-by: James Bottomley <James.Bottomley@...e.de>
> Signed-off-by: Greg Kroah-Hartman <gregkh@...e.de>
> 
> ---
>  drivers/scsi/libfc/fc_rport.c |   69 ++++++++++++++++++++++++++++++------------
>  include/scsi/libfc.h          |    1 
>  2 files changed, 51 insertions(+), 19 deletions(-)
> 
> --- a/drivers/scsi/libfc/fc_rport.c
> +++ b/drivers/scsi/libfc/fc_rport.c
> @@ -86,6 +86,7 @@ static const char *fc_rport_state_names[
>  	[RPORT_ST_LOGO] = "LOGO",
>  	[RPORT_ST_ADISC] = "ADISC",
>  	[RPORT_ST_DELETE] = "Delete",
> +	[RPORT_ST_RESTART] = "Restart",
>  };
>  
>  /**
> @@ -99,8 +100,7 @@ static struct fc_rport_priv *fc_rport_lo
>  	struct fc_rport_priv *rdata;
>  
>  	list_for_each_entry(rdata, &lport->disc.rports, peers)
> -		if (rdata->ids.port_id == port_id &&
> -		    rdata->rp_state != RPORT_ST_DELETE)
> +		if (rdata->ids.port_id == port_id)
>  			return rdata;
>  	return NULL;
>  }
> @@ -235,6 +235,7 @@ static void fc_rport_work(struct work_st
>  	struct fc_rport_operations *rport_ops;
>  	struct fc_rport_identifiers ids;
>  	struct fc_rport *rport;
> +	int restart = 0;
>  
>  	mutex_lock(&rdata->rp_mutex);
>  	event = rdata->event;
> @@ -287,8 +288,19 @@ static void fc_rport_work(struct work_st
>  		mutex_unlock(&rdata->rp_mutex);
>  
>  		if (port_id != FC_FID_DIR_SERV) {
> +			/*
> +			 * We must drop rp_mutex before taking disc_mutex.
> +			 * Re-evaluate state to allow for restart.
> +			 * A transition to RESTART state must only happen
> +			 * while disc_mutex is held and rdata is on the list.
> +			 */
>  			mutex_lock(&lport->disc.disc_mutex);
> -			list_del(&rdata->peers);
> +			mutex_lock(&rdata->rp_mutex);
> +			if (rdata->rp_state == RPORT_ST_RESTART)
> +				restart = 1;
> +			else
> +				list_del(&rdata->peers);

There is a follow-up patch that adds this line at this point:

                         rdata->event = RPORT_EV_NONE;

If this patch is integrated, that one should be integrated
as well.  That patch is:

commit 5543c72e2bbb30e5ba5938b18ec26617b8b3fb04
Author: Abhijeet Joglekar <abjoglek@...co.com>
Date:   Thu Dec 10 09:59:20 2009 -0800

[SCSI] libfc: remote port gets stuck in restart state without really restarting

	Joe


> +			mutex_unlock(&rdata->rp_mutex);
>  			mutex_unlock(&lport->disc.disc_mutex);
>  		}
>  
> @@ -312,7 +324,13 @@ static void fc_rport_work(struct work_st
>  			mutex_unlock(&rdata->rp_mutex);
>  			fc_remote_port_delete(rport);
>  		}
> -		kref_put(&rdata->kref, lport->tt.rport_destroy);
> +		if (restart) {
> +			mutex_lock(&rdata->rp_mutex);
> +			FC_RPORT_DBG(rdata, "work restart\n");
> +			fc_rport_enter_plogi(rdata);
> +			mutex_unlock(&rdata->rp_mutex);
> +		} else
> +			kref_put(&rdata->kref, lport->tt.rport_destroy);
>  		break;
>  
>  	default:
> @@ -342,6 +360,12 @@ int fc_rport_login(struct fc_rport_priv 
>  		FC_RPORT_DBG(rdata, "ADISC port\n");
>  		fc_rport_enter_adisc(rdata);
>  		break;
> +	case RPORT_ST_RESTART:
> +		break;
> +	case RPORT_ST_DELETE:
> +		FC_RPORT_DBG(rdata, "Restart deleted port\n");
> +		fc_rport_state_enter(rdata, RPORT_ST_RESTART);
> +		break;
>  	default:
>  		FC_RPORT_DBG(rdata, "Login to port\n");
>  		fc_rport_enter_plogi(rdata);
> @@ -397,20 +421,21 @@ int fc_rport_logoff(struct fc_rport_priv
>  
>  	if (rdata->rp_state == RPORT_ST_DELETE) {
>  		FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
> -		mutex_unlock(&rdata->rp_mutex);
>  		goto out;
>  	}
>  
> -	fc_rport_enter_logo(rdata);
> +	if (rdata->rp_state == RPORT_ST_RESTART)
> +		FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n");
> +	else
> +		fc_rport_enter_logo(rdata);
>  
>  	/*
>  	 * Change the state to Delete so that we discard
>  	 * the response.
>  	 */
>  	fc_rport_enter_delete(rdata, RPORT_EV_STOP);
> -	mutex_unlock(&rdata->rp_mutex);
> -
>  out:
> +	mutex_unlock(&rdata->rp_mutex);
>  	return 0;
>  }
>  
> @@ -466,6 +491,7 @@ static void fc_rport_timeout(struct work
>  	case RPORT_ST_READY:
>  	case RPORT_ST_INIT:
>  	case RPORT_ST_DELETE:
> +	case RPORT_ST_RESTART:
>  		break;
>  	}
>  
> @@ -499,6 +525,7 @@ static void fc_rport_error(struct fc_rpo
>  		fc_rport_enter_logo(rdata);
>  		break;
>  	case RPORT_ST_DELETE:
> +	case RPORT_ST_RESTART:
>  	case RPORT_ST_READY:
>  	case RPORT_ST_INIT:
>  		break;
> @@ -1248,6 +1275,7 @@ static void fc_rport_recv_plogi_req(stru
>  		}
>  		break;
>  	case RPORT_ST_PRLI:
> +	case RPORT_ST_RTV:
>  	case RPORT_ST_READY:
>  	case RPORT_ST_ADISC:
>  		FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d "
> @@ -1255,11 +1283,14 @@ static void fc_rport_recv_plogi_req(stru
>  		/* XXX TBD - should reset */
>  		break;
>  	case RPORT_ST_DELETE:
> -	default:
> -		FC_RPORT_DBG(rdata, "Received PLOGI in unexpected state %d\n",
> -			     rdata->rp_state);
> -		fc_frame_free(rx_fp);
> -		goto out;
> +	case RPORT_ST_LOGO:
> +	case RPORT_ST_RESTART:
> +		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
> +			     fc_rport_state(rdata));
> +		mutex_unlock(&rdata->rp_mutex);
> +		rjt_data.reason = ELS_RJT_BUSY;
> +		rjt_data.explan = ELS_EXPL_NONE;
> +		goto reject;
>  	}
>  
>  	/*
> @@ -1510,14 +1541,14 @@ static void fc_rport_recv_logo_req(struc
>  		FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
>  			     fc_rport_state(rdata));
>  
> +		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> +
>  		/*
> -		 * If the remote port was created due to discovery,
> -		 * log back in.  It may have seen a stale RSCN about us.
> +		 * If the remote port was created due to discovery, set state
> +		 * to log back in.  It may have seen a stale RSCN about us.
>  		 */
> -		if (rdata->rp_state != RPORT_ST_DELETE && rdata->disc_id)
> -			fc_rport_enter_plogi(rdata);
> -		else
> -			fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> +		if (rdata->disc_id)
> +			fc_rport_state_enter(rdata, RPORT_ST_RESTART);
>  		mutex_unlock(&rdata->rp_mutex);
>  	} else
>  		FC_RPORT_ID_DBG(lport, sid,
> --- a/include/scsi/libfc.h
> +++ b/include/scsi/libfc.h
> @@ -145,6 +145,7 @@ enum fc_rport_state {
>  	RPORT_ST_LOGO,		/* port logout sent */
>  	RPORT_ST_ADISC,		/* Discover Address sent */
>  	RPORT_ST_DELETE,	/* port being deleted */
> +	RPORT_ST_RESTART,       /* remote port being deleted and will restart */
>  };
>  
>  /**
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/