lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue, 18 Jun 2024 09:55:44 +0100
From: John Garry <john.g.garry@...cle.com>
To: Xingui Yang <yangxingui@...wei.com>, yanaijie@...wei.com,
        jejb@...ux.ibm.com, martin.petersen@...cle.com,
        damien.lemoal@...nsource.wdc.com
Cc: linux-scsi@...r.kernel.org, linux-kernel@...r.kernel.org,
        linuxarm@...wei.com, prime.zeng@...ilicon.com,
        chenxiang66@...ilicon.com, kangfenglong@...wei.com
Subject: Re: [PATCH v3] scsi: libsas: Fix exp-attached end device cannot be
 scanned in again after probe failed

On 13/06/2024 13:23, Xingui Yang wrote:

Sorry for delay in responding and asking further questions.

> We found that it is judged as broadcast flutter when the exp-attached end
> device reconnects after probe failed, as follows:
> 
> [78779.654026] sas: broadcast received: 0
> [78779.654037] sas: REVALIDATING DOMAIN on port 0, pid:10
> [78779.654680] sas: ex 500e004aaaaaaa1f phy05 change count has changed
> [78779.662977] sas: ex 500e004aaaaaaa1f phy05 originated BROADCAST(CHANGE)
> [78779.662986] sas: ex 500e004aaaaaaa1f phy05 new device attached
> [78779.663079] sas: ex 500e004aaaaaaa1f phy05:U:8 attached: 500e004aaaaaaa05 (stp)
> [78779.693542] hisi_sas_v3_hw 0000:b4:02.0: dev[16:5] found
> [78779.701155] sas: done REVALIDATING DOMAIN on port 0, pid:10, res 0x0
> [78779.707864] sas: Enter sas_scsi_recover_host busy: 0 failed: 0
> ...
> [78835.161307] sas: --- Exit sas_scsi_recover_host: busy: 0 failed: 0 tries: 1
> [78835.171344] sas: sas_probe_sata: for exp-attached device 500e004aaaaaaa05 returned -19
> [78835.180879] hisi_sas_v3_hw 0000:b4:02.0: dev[16:5] is gone
> [78835.187487] sas: broadcast received: 0
> [78835.187504] sas: REVALIDATING DOMAIN on port 0, pid:10
> [78835.188263] sas: ex 500e004aaaaaaa1f phy05 change count has changed
> [78835.195870] sas: ex 500e004aaaaaaa1f phy05 originated BROADCAST(CHANGE)
> [78835.195875] sas: ex 500e004aaaaaaa1f rediscovering phy05
> [78835.196022] sas: ex 500e004aaaaaaa1f phy05:U:A attached: 500e004aaaaaaa05 (stp)
> [78835.196026] sas: ex 500e004aaaaaaa1f phy05 broadcast flutter
> [78835.197615] sas: done REVALIDATING DOMAIN on port 0, pid:10, res 0x0
> 
> The cause of the problem is that the related ex_phy's attached_sas_addr was
> not cleared after the end device probe failed. In order to solve the above
> problem, a function sas_ex_unregister_end_dev() is defined to clear the
> ex_phy information and unregister the end device after the exp-attached end
> device probe failed.

Can you just manually clear the ex_phy's attached_sas_addr at the 
appropiate point (along with calling sas_unregister_dev())? It seems 
that we are using heavy-handed approach in calling 
sas_unregister_devs_sas_addr(), which does the clearing and much more.

> 
> As devices may probe failed after done REVALIDATING DOMAIN when call
> sas_probe_devices(). Then after its port is added to the sas_port_del_list,
> the port will not be deleted until the end of the next REVALIDATING DOMAIN
> and sas_destruct_ports() is called. A warning about creating a duplicate
> port will occur in the new REVALIDATING DOMAIN when the end device
> reconnects. Therefore, the previous destroy_list and sas_port_del_list
> should be handled after devices probe failed.
> 
> Signed-off-by: Xingui Yang <yangxingui@...wei.com>
> ---
> Changes since v2:
> - Add a helper for calling sas_destruct_devices() and sas_destruct_ports(),
>    and put the new call at the end of sas_probe_devices() based on John's
>    suggestion.
> 
> Changes since v1:
> - Simplify the process of getting ex_phy id based on Jason's suggestion.
> - Update commit information.
> ---
>   drivers/scsi/libsas/sas_discover.c | 32 +++++++++++++++++++-----------
>   drivers/scsi/libsas/sas_expander.c |  8 ++++++++
>   drivers/scsi/libsas/sas_internal.h |  6 +++++-
>   3 files changed, 33 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
> index 8fb7c41c0962..8c517e47d2b9 100644
> --- a/drivers/scsi/libsas/sas_discover.c
> +++ b/drivers/scsi/libsas/sas_discover.c
> @@ -17,6 +17,22 @@
>   #include <scsi/sas_ata.h>
>   #include "scsi_sas_internal.h"
>   
> +static void sas_destruct_ports(struct asd_sas_port *port)
> +{
> +	struct sas_port *sas_port, *p;
> +
> +	list_for_each_entry_safe(sas_port, p, &port->sas_port_del_list, del_list) {
> +		list_del_init(&sas_port->del_list);
> +		sas_port_delete(sas_port);
> +	}
> +}
> +
> +static void sas_destruct_devices_and_ports(struct asd_sas_port *port)

"and" in a function name never sounds right.

Can you just call sas_destruct_port(), as it takes a port arg? Maybe 
rename sas_destruct_ports() to sas_delete_ports(), as it does "delete" - 
this may avoid some confusion in names.

> +{
> +	sas_destruct_devices(port);
> +	sas_destruct_ports(port);
> +}
> +
>   /* ---------- Basic task processing for discovery purposes ---------- */
>   
>   void sas_init_dev(struct domain_device *dev)
> @@ -226,6 +242,9 @@ static void sas_probe_devices(struct asd_sas_port *port)
>   		else
>   			list_del_init(&dev->disco_list_node);
>   	}
> +
> +	/* destruct devices and ports after probe failed */
> +	sas_destruct_devices_and_ports(port);
>   }
>   
>   static void sas_suspend_devices(struct work_struct *work)
> @@ -350,16 +369,6 @@ void sas_destruct_devices(struct asd_sas_port *port)
>   	}
>   }
>   
> -static void sas_destruct_ports(struct asd_sas_port *port)
> -{
> -	struct sas_port *sas_port, *p;
> -
> -	list_for_each_entry_safe(sas_port, p, &port->sas_port_del_list, del_list) {
> -		list_del_init(&sas_port->del_list);
> -		sas_port_delete(sas_port);
> -	}
> -}
> -
>   static bool sas_abort_cmd(struct request *req, void *data)
>   {
>   	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
> @@ -538,8 +547,7 @@ static void sas_revalidate_domain(struct work_struct *work)
>    out:
>   	mutex_unlock(&ha->disco_mutex);
>   
> -	sas_destruct_devices(port);
> -	sas_destruct_ports(port);
> +	sas_destruct_devices_and_ports(port);
>   	sas_probe_devices(port);
>   }
>   
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 4e6bb3d0f163..09be69ea09a2 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -1878,6 +1878,14 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
>   	}
>   }
>   
> +void sas_ex_unregister_end_dev(struct domain_device *dev)
> +{
> +	struct domain_device *parent = dev->parent;
> +	struct sas_phy *phy = dev->phy;
> +
> +	sas_unregister_devs_sas_addr(parent, phy->number, true);
> +}
> +
>   static int sas_discover_bfs_by_root_level(struct domain_device *root,
>   					  const int level)
>   {
> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
> index 85948963fb97..caeda847c919 100644
> --- a/drivers/scsi/libsas/sas_internal.h
> +++ b/drivers/scsi/libsas/sas_internal.h
> @@ -50,6 +50,7 @@ void sas_discover_event(struct asd_sas_port *port, enum discover_event ev);
>   
>   void sas_init_dev(struct domain_device *dev);
>   void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev);
> +void sas_ex_unregister_end_dev(struct domain_device *dev);
>   
>   void sas_scsi_recover_host(struct Scsi_Host *shost);
>   
> @@ -145,7 +146,10 @@ static inline void sas_fail_probe(struct domain_device *dev, const char *func, i
>   		func, dev->parent ? "exp-attached" :
>   		"direct-attached",
>   		SAS_ADDR(dev->sas_addr), err);
> -	sas_unregister_dev(dev->port, dev);
> +	if (dev->parent && !dev_is_expander(dev->dev_type))
> +		sas_ex_unregister_end_dev(dev);
> +	else
> +		sas_unregister_dev(dev->port, dev);
>   }
>   
>   static inline void sas_fill_in_rphy(struct domain_device *dev,


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ