[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1431253604-9214-2-git-send-email-haggaie@mellanox.com>
Date: Sun, 10 May 2015 13:26:32 +0300
From: Haggai Eran <haggaie@...lanox.com>
To: Doug Ledford <dledford@...hat.com>
Cc: linux-rdma@...r.kernel.org, netdev@...r.kernel.org,
Liran Liss <liranl@...lanox.com>,
Guy Shapiro <guysh@...lanox.com>,
Shachar Raindel <raindel@...lanox.com>,
Yotam Kenneth <yotamke@...lanox.com>,
Haggai Eran <haggaie@...lanox.com>,
Matan Barak <matanb@...lanox.com>,
Jason Gunthorpe <jgunthorpe@...idianresearch.com>
Subject: [PATCH v3 for-next 01/13] IB/core: Use SRCU when reading client_list or device_list
Currently the RDMA subsystem's device list and client list are protected by
a single mutex. This prevents adding user-facing APIs that iterate these
lists, since using them may cause a deadlock. The patch attempts to solve
this problem by adding an SRCU to protect the lists. Readers now don't need
the mutex, and are safe just by using srcu_read_lock/unlock.
The ib_register_device, ib_register_client, and ib_unregister_client
functions are modified to only lock the device_mutex during their
respective list modification, and use the SRCU for iteration on the other
list. In ib_unregister_device, the client list iteration remains in the
mutex critical section as it is done in reverse order.
This patch attempts to solve a similar need [1] that was seen in the RoCE
v2 patch series.
[1] http://www.spinics.net/lists/linux-rdma/msg24733.html
Cc: Matan Barak <matanb@...lanox.com>
Cc: Jason Gunthorpe <jgunthorpe@...idianresearch.com>
Signed-off-by: Haggai Eran <haggaie@...lanox.com>
---
drivers/infiniband/core/device.c | 75 ++++++++++++++++++++++++++++++----------
1 file changed, 56 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b360350a0b20..7d90b2ca2eba 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,12 +58,11 @@ EXPORT_SYMBOL_GPL(ib_wq);
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
+/* device_srcu protects access to both device_list and client_list. */
+static struct srcu_struct device_srcu;
+
/*
- * device_mutex protects access to both device_list and client_list.
- * There's no real point to using multiple locks or something fancier
- * like an rwsem: we always access both lists, and we're always
- * modifying one list or the other list. In any case this is not a
- * hot path so there's no point in trying to optimize.
+ * device_mutex protects writer access to both device_list and client_list.
*/
static DEFINE_MUTEX(device_mutex);
@@ -276,6 +275,7 @@ int ib_register_device(struct ib_device *device,
u8, struct kobject *))
{
int ret;
+ int id;
mutex_lock(&device_mutex);
@@ -315,13 +315,19 @@ int ib_register_device(struct ib_device *device,
device->reg_state = IB_DEV_REGISTERED;
+ mutex_unlock(&device_mutex);
+
+ id = srcu_read_lock(&device_srcu);
{
struct ib_client *client;
- list_for_each_entry(client, &client_list, list)
+ list_for_each_entry_rcu(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
}
+ srcu_read_unlock(&device_srcu, id);
+
+ return 0;
out:
mutex_unlock(&device_mutex);
@@ -338,6 +344,7 @@ EXPORT_SYMBOL(ib_register_device);
void ib_unregister_device(struct ib_device *device)
{
struct ib_client *client;
+ LIST_HEAD(contexts);
struct ib_client_data *context, *tmp;
unsigned long flags;
@@ -347,21 +354,26 @@ void ib_unregister_device(struct ib_device *device)
if (client->remove)
client->remove(device);
- list_del(&device->core_list);
+ list_del_rcu(&device->core_list);
+
+ mutex_unlock(&device_mutex);
+
+ synchronize_srcu(&device_srcu);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
- mutex_unlock(&device_mutex);
-
ib_device_unregister_sysfs(device);
spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
- kfree(context);
+ list_cut_position(&contexts, &device->client_data_list,
+ device->client_data_list.prev);
spin_unlock_irqrestore(&device->client_data_lock, flags);
device->reg_state = IB_DEV_UNREGISTERED;
+
+ list_for_each_entry_safe(context, tmp, &contexts, list)
+ kfree(context);
}
EXPORT_SYMBOL(ib_unregister_device);
@@ -381,15 +393,19 @@ EXPORT_SYMBOL(ib_unregister_device);
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
+ int id;
mutex_lock(&device_mutex);
+ list_add_tail_rcu(&client->list, &client_list);
+ mutex_unlock(&device_mutex);
- list_add_tail(&client->list, &client_list);
- list_for_each_entry(device, &device_list, core_list)
+ id = srcu_read_lock(&device_srcu);
+
+ list_for_each_entry_rcu(device, &device_list, core_list)
if (client->add && !add_client_context(device, client))
client->add(device);
- mutex_unlock(&device_mutex);
+ srcu_read_unlock(&device_srcu, id);
return 0;
}
@@ -407,11 +423,13 @@ void ib_unregister_client(struct ib_client *client)
{
struct ib_client_data *context, *tmp;
struct ib_device *device;
+ LIST_HEAD(contexts);
unsigned long flags;
+ int id;
- mutex_lock(&device_mutex);
+ id = srcu_read_lock(&device_srcu);
- list_for_each_entry(device, &device_list, core_list) {
+ list_for_each_entry_rcu(device, &device_list, core_list) {
if (client->remove)
client->remove(device);
@@ -419,13 +437,21 @@ void ib_unregister_client(struct ib_client *client)
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
if (context->client == client) {
list_del(&context->list);
- kfree(context);
+ list_add(&context->list, &contexts);
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
- list_del(&client->list);
+ srcu_read_unlock(&device_srcu, id);
+
+ mutex_lock(&device_mutex);
+ list_del_rcu(&client->list);
mutex_unlock(&device_mutex);
+
+ synchronize_srcu(&device_srcu);
+
+ list_for_each_entry_safe(context, tmp, &contexts, list)
+ kfree(context);
}
EXPORT_SYMBOL(ib_unregister_client);
@@ -738,9 +764,15 @@ static int __init ib_core_init(void)
{
int ret;
+ ret = init_srcu_struct(&device_srcu);
+ if (ret) {
+ pr_warn("Couldn't initialize SRCU\n");
+ return ret;
+ }
+
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
- return -ENOMEM;
+ goto err_srcu;
ret = ib_sysfs_setup();
if (ret) {
@@ -770,6 +802,9 @@ err_sysfs:
err:
destroy_workqueue(ib_wq);
+err_srcu:
+ cleanup_srcu_struct(&device_srcu);
+
return ret;
}
@@ -780,6 +815,8 @@ static void __exit ib_core_cleanup(void)
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
+ srcu_barrier(&device_srcu);
+ cleanup_srcu_struct(&device_srcu);
}
module_init(ib_core_init);
--
1.7.11.2
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists