lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1478185120-5509-1-git-send-email-john.garry@huawei.com>
Date:   Thu, 3 Nov 2016 22:58:40 +0800
From:   John Garry <john.garry@...wei.com>
To:     <martin.petersen@...cle.com>, <jejb@...ux.vnet.ibm.com>
CC:     <linux-scsi@...r.kernel.org>, <linuxarm@...wei.com>,
        <linux-kernel@...r.kernel.org>, <dan.j.williams@...el.com>,
        <john.garry2@...l.dcu.ie>, John Garry <john.garry@...wei.com>
Subject: [RFC PATCH] scsi: libsas: fix WARN on device removal

The following patch introduces an annoying WARN
when a device is removed from the SAS topology:
[SCSI] libsas: prevent domain rediscovery competing with ata error handling

A sample WARN is as follows:
[  236.842227] WARNING: CPU: 7 PID: 1520 at fs/sysfs/group.c:237 sysfs_remove_group+0x90/0x98
[  236.850465] Modules linked in:
[  236.853544]
[  236.855045] CPU: 7 PID: 1520 Comm: kworker/u64:4 Tainted: G        W       4.9.0-rc1-15310-g3fbc29e-dirty #676
[  236.865010] Hardware name: Huawei Taishan 2180 /D03, BIOS Estuary v2.3 D03 UEFI 08/17/2016
[  236.873249] Workqueue: scsi_wq_0 sas_destruct_devices
[  236.878317] task: ffff8027ba31b200 task.stack: ffff8027b9d44000
[  236.884225] PC is at sysfs_remove_group+0x90/0x98
[  236.888920] LR is at sysfs_remove_group+0x90/0x98
[  236.893616] pc : [<ffff000008256df8>] lr : [<ffff000008256df8>] pstate: 60000145
[  236.900989] sp : ffff8027b9d47bf0

< snip >

[  237.116463] [<ffff000008256df8>] sysfs_remove_group+0x90/0x98
[  237.122197] [<ffff00000851fe68>] dpm_sysfs_remove+0x58/0x68
[  237.127758] [<ffff000008513678>] device_del+0x40/0x218
[  237.132886] [<ffff000008513864>] device_unregister+0x14/0x2c
[  237.138536] [<ffff0000083670c4>] bsg_unregister_queue+0x5c/0xa0
[  237.144442] [<ffff00000855b984>] sas_rphy_remove+0x44/0x80
[  237.149915] [<ffff00000855b9d4>] sas_rphy_delete+0x14/0x28
[  237.155388] [<ffff00000855f9d8>] sas_destruct_devices+0x64/0x98
[  237.161293] [<ffff0000080d2c1c>] process_one_work+0x128/0x2e4
[  237.167027] [<ffff0000080d2e30>] worker_thread+0x58/0x434
[  237.172415] [<ffff0000080d8c24>] kthread+0xd4/0xe8
[  237.177198] [<ffff000008082e80>] ret_from_fork+0x10/0x50
[  237.182557] sysfs group 'power' not found for kobject 'end_device-0:0:5'

(this can be really huge when an expander is unplugged)

The problem is with the process of sas_port and domain_device
destruction in domain revalidation. There is a 2-stage process:
In domain revalidation (which runs in work queue context), if a
domain_device is discovered to be gone, then the following happens:
- the domain_device is queued for destruction in a separate work item
- the associated sas_port is destroyed immediately

This causes a problem in that the sas_port associated with
a domain_device is destroyed prior the domain_device: this causes
the sysfs WARN. Essentially the "rug has been pulled from underneath".

Also, likewise, when a root port is deformed due to loss of signal,
we have the same issue.

To solve, destroy the sas_port in a separate work item to which
we do the domain revalidation with a new discovery event, as follows:
- When a domain_device is detected to be gone, the domain_device is
  queued for destruction in a separate work item. The associated
  sas_port is also queued for destruction in another separate work item
  (needs to be queued 2nd)
- the domain_device is destroyed
- the sas_port is destroyed
[similar is done for loss of signal event, in sas_port_deformed()].

Fixes: 87c8331fcf72e501c3a3c0cdc5c [SCSI] libsas: prevent domain
rediscovery competing with ata error handling

Signed-off-by: John Garry <john.garry@...wei.com>

diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 60de662..01d0fe2 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -361,7 +361,7 @@ static void sas_destruct_devices(struct work_struct *work)
 
 	clear_bit(DISCE_DESTRUCT, &port->disc.pending);
 
-	list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {
+	list_for_each_entry_safe(dev, n, &port->dev_destroy_list, disco_list_node) {
 		list_del_init(&dev->disco_list_node);
 
 		sas_remove_children(&dev->rphy->dev);
@@ -383,7 +383,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 
 	if (!test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {
 		sas_rphy_unlink(dev->rphy);
-		list_move_tail(&dev->disco_list_node, &port->destroy_list);
+		list_move_tail(&dev->disco_list_node, &port->dev_destroy_list);
 		sas_discover_event(dev->port, DISCE_DESTRUCT);
 	}
 }
@@ -525,6 +525,28 @@ static void sas_revalidate_domain(struct work_struct *work)
 	mutex_unlock(&ha->disco_mutex);
 }
 
+/* ---------- Async Port destruct ---------- */
+static void sas_async_port_destruct(struct work_struct *work)
+{
+	struct sas_discovery_event *ev = to_sas_discovery_event(work);
+	struct asd_sas_port *port = ev->port;
+	struct sas_port *sas_port, *n;
+
+	clear_bit(DISCE_PORT_DESTRUCT, &port->disc.pending);
+
+	list_for_each_entry_safe(sas_port, n, &port->port_destroy_list, destroy_list) {
+		list_del_init(&port->port_destroy_list);
+
+		sas_port_delete(sas_port);
+	}
+}
+
+void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port)
+{
+	list_move_tail(&sas_port->destroy_list, &port->port_destroy_list);
+	sas_discover_event(port, DISCE_PORT_DESTRUCT);
+}
+
 /* ---------- Events ---------- */
 
 static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
@@ -582,6 +604,7 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
 		[DISCE_SUSPEND] = sas_suspend_devices,
 		[DISCE_RESUME] = sas_resume_devices,
 		[DISCE_DESTRUCT] = sas_destruct_devices,
+		[DISCE_PORT_DESTRUCT] = sas_async_port_destruct,
 	};
 
 	disc->pending = 0;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 022bb6e..f9522a0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1900,10 +1900,11 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 	}
 	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
 	if (phy->port) {
+		struct asd_sas_port *port = found->port;
 		sas_port_delete_phy(phy->port, phy->phy);
 		sas_device_set_phy(found, phy->port);
 		if (phy->port->num_phys == 0)
-			sas_port_delete(phy->port);
+			sas_port_destruct(port, phy->port);
 		phy->port = NULL;
 	}
 }
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index d3c5297..1a32f86 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -219,7 +219,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
 
 	if (port->num_phys == 1) {
 		sas_unregister_domain_devices(port, gone);
-		sas_port_delete(port->port);
+		sas_port_destruct(port, port->port);
 		port->port = NULL;
 	} else {
 		sas_port_delete_phy(port->port, phy->phy);
@@ -322,7 +322,8 @@ static void sas_init_port(struct asd_sas_port *port,
 	port->id = i;
 	INIT_LIST_HEAD(&port->dev_list);
 	INIT_LIST_HEAD(&port->disco_list);
-	INIT_LIST_HEAD(&port->destroy_list);
+	INIT_LIST_HEAD(&port->dev_destroy_list);
+	INIT_LIST_HEAD(&port->port_destroy_list);
 	spin_lock_init(&port->phy_list_lock);
 	INIT_LIST_HEAD(&port->phy_list);
 	port->ha = sas_ha;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 60b651b..062c03c 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -934,6 +934,7 @@ struct sas_port *sas_port_alloc(struct device *parent, int port_id)
 
 	mutex_init(&port->phy_list_mutex);
 	INIT_LIST_HEAD(&port->phy_list);
+	INIT_LIST_HEAD(&port->destroy_list);
 
 	if (scsi_is_sas_expander_device(parent)) {
 		struct sas_rphy *rphy = dev_to_rphy(parent);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index dae99d7..a7953c8 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -91,7 +91,8 @@ enum discover_event {
 	DISCE_SUSPEND		= 4,
 	DISCE_RESUME		= 5,
 	DISCE_DESTRUCT		= 6,
-	DISC_NUM_EVENTS		= 7,
+	DISCE_PORT_DESTRUCT	= 7,
+	DISC_NUM_EVENTS,
 };
 
 /* ---------- Expander Devices ---------- */
@@ -268,7 +269,8 @@ struct asd_sas_port {
 	spinlock_t dev_list_lock;
 	struct list_head dev_list;
 	struct list_head disco_list;
-	struct list_head destroy_list;
+	struct list_head dev_destroy_list;
+	struct list_head port_destroy_list;
 	enum   sas_linkrate linkrate;
 
 	struct sas_work work;
@@ -702,6 +704,7 @@ extern int sas_bios_param(struct scsi_device *,
 int  sas_ex_revalidate_domain(struct domain_device *);
 
 void sas_unregister_domain_devices(struct asd_sas_port *port, int gone);
+void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port);
 void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *);
 int  sas_discover_event(struct asd_sas_port *, enum discover_event ev);
 
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 73d8709..b495aac 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -154,6 +154,7 @@ struct sas_port {
 
 	struct mutex		phy_list_mutex;
 	struct list_head	phy_list;
+	struct list_head	destroy_list; /* only used by libsas */
 };
 
 #define dev_to_sas_port(d) \
-- 
1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ