[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <151903431795.8021.5069678488269142143.stgit@localhost.localdomain>
Date: Mon, 19 Feb 2018 12:58:38 +0300
From: Kirill Tkhai <ktkhai@...tuozzo.com>
To: davem@...emloft.net, nicolas.dichtel@...nd.com,
vyasevic@...hat.com, ktkhai@...tuozzo.com,
paulmck@...ux.vnet.ibm.com, vyasevich@...il.com,
mark.rutland@....com, gregkh@...uxfoundation.org,
leonro@...lanox.com, avagin@...tuozzo.com, fw@...len.de,
roman.kapl@...go.com, netdev@...r.kernel.org
Subject: [PATCH 1/3] net: Kill net_mutex
We take net_mutex, when there are !async pernet_operations
registered, and read locking of net_sem is not enough. But
we may get rid of taking the mutex, and just change the logic
to write lock net_sem in such cases. This obviously reduces
the number of lock operations, we do.
Signed-off-by: Kirill Tkhai <ktkhai@...tuozzo.com>
---
include/linux/rtnetlink.h | 1 -
include/net/net_namespace.h | 11 ++++++---
net/core/net_namespace.c | 53 ++++++++++++++++++++++++++-----------------
3 files changed, 39 insertions(+), 26 deletions(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index e9ee9ad0a681..3573b4bf2fdf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,7 +35,6 @@ extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
extern struct rw_semaphore net_sem;
#ifdef CONFIG_PROVE_LOCKING
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9158ec1ad06f..115b01b92f4d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,8 +60,11 @@ struct net {
struct list_head list; /* list of network namespaces */
struct list_head cleanup_list; /* namespaces on death row */
- struct list_head exit_list; /* Use only net_mutex */
-
+ struct list_head exit_list; /* To linked to call pernet exit
+ * methods on dead net (net_sem
+ * read locked), or to unregister
+ * pernet ops (net_sem wr locked).
+ */
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
spinlock_t nsid_lock;
@@ -89,7 +92,7 @@ struct net {
/* core fib_rules */
struct list_head rules_ops;
- struct list_head fib_notifier_ops; /* protected by net_mutex */
+ struct list_head fib_notifier_ops; /* protected by net_sem */
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
@@ -316,7 +319,7 @@ struct pernet_operations {
/*
* Indicates above methods are allowed to be executed in parallel
* with methods of any other pernet_operations, i.e. they are not
- * need synchronization via net_mutex.
+ * need write locked net_sem.
*/
bool async;
};
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bcab9a938d6f..e89a516620dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,8 +29,6 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-/* Used only if there are !async pernet_operations registered */
-DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -407,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags,
{
struct ucounts *ucounts;
struct net *net;
+ unsigned write;
int rv;
if (!(flags & CLONE_NEWNET))
@@ -424,20 +423,26 @@ struct net *copy_net_ns(unsigned long flags,
refcount_set(&net->passive, 1);
net->ucounts = ucounts;
get_user_ns(user_ns);
-
- rv = down_read_killable(&net_sem);
+again:
+ write = READ_ONCE(nr_sync_pernet_ops);
+ if (write)
+ rv = down_write_killable(&net_sem);
+ else
+ rv = down_read_killable(&net_sem);
if (rv < 0)
goto put_userns;
- if (nr_sync_pernet_ops) {
- rv = mutex_lock_killable(&net_mutex);
- if (rv < 0)
- goto up_read;
+
+ if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+ up_read(&net_sem);
+ goto again;
}
rv = setup_net(net, user_ns);
- if (nr_sync_pernet_ops)
- mutex_unlock(&net_mutex);
-up_read:
- up_read(&net_sem);
+
+ if (write)
+ up_write(&net_sem);
+ else
+ up_read(&net_sem);
+
if (rv < 0) {
put_userns:
put_user_ns(user_ns);
@@ -485,15 +490,23 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
+ unsigned write;
/* Atomically snapshot the list of namespaces to cleanup */
spin_lock_irq(&cleanup_list_lock);
list_replace_init(&cleanup_list, &net_kill_list);
spin_unlock_irq(&cleanup_list_lock);
+again:
+ write = READ_ONCE(nr_sync_pernet_ops);
+ if (write)
+ down_write(&net_sem);
+ else
+ down_read(&net_sem);
- down_read(&net_sem);
- if (nr_sync_pernet_ops)
- mutex_lock(&net_mutex);
+ if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+ up_read(&net_sem);
+ goto again;
+ }
/* Don't let anyone else find us. */
rtnl_lock();
@@ -528,14 +541,14 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
- if (nr_sync_pernet_ops)
- mutex_unlock(&net_mutex);
-
/* Free the net generic variables */
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);
- up_read(&net_sem);
+ if (write)
+ up_write(&net_sem);
+ else
+ up_read(&net_sem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
@@ -563,8 +576,6 @@ static void cleanup_net(struct work_struct *work)
void net_ns_barrier(void)
{
down_write(&net_sem);
- mutex_lock(&net_mutex);
- mutex_unlock(&net_mutex);
up_write(&net_sem);
}
EXPORT_SYMBOL(net_ns_barrier);
Powered by blists - more mailing lists