[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1426642379-20503-2-git-send-email-ying.xue@windriver.com>
Date: Wed, 18 Mar 2015 09:32:57 +0800
From: Ying Xue <ying.xue@...driver.com>
To: <davem@...emloft.net>
CC: <jon.maloy@...csson.com>, <cwang@...pensource.com>,
<erik.hugne@...csson.com>, <netdev@...r.kernel.org>,
<tipc-discussion@...ts.sourceforge.net>
Subject: [PATCH net-next 1/3] tipc: fix netns refcnt leak
When the TIPC module is loaded, we launch a topology server in kernel
space, which in its turn is creating TIPC sockets for communication
with topology server users. Because both the socket's creator and
provider reside in the same module, it is necessary that the TIPC
module's reference count remains zero after the server is started and
the socket created; otherwise it becomes impossible to perform "rmmod"
even on an idle module.
Currently, we achieve this by defining a separate "tipc_proto_kern"
protocol struct, that is used only for kernel space socket allocations.
This structure has the "owner" field set to NULL, which restricts the
module reference count from being be bumped when sk_alloc() for local
sockets is called. Furthermore, we have defined three kernel-specific
functions, tipc_sock_create_local(), tipc_sock_release_local() and
tipc_sock_accept_local(), to avoid the module counter being modified
when module local sockets are created or deleted. This has worked well
until we introduced name space support.
However, after name space support was introduced, we have observed that
a reference count leak occurs, because the netns counter is not
decremented in tipc_sock_delete_local().
This commit remedies this problem. But instead of just modifying
tipc_sock_delete_local(), we eliminate the whole parallel socket
handling infrastructure, and start using the regular sk_create_kern(),
kernel_accept() and sk_release_kernel() calls. Since those functions
manipulate the module counter, we must now compensate for that by
explicitly decrementing the counter after module local sockets are
created, and increment it just before calling sk_release_kernel().
Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace")
Signed-off-by: Ying Xue <ying.xue@...driver.com>
Reviewed-by: Jon Maloy <jon.maloy@...cson.com>
Reviewed-by: Erik Hugne <erik.hugne@...csson.com>
Reported-by: Cong Wang <cwang@...pensource.com>
Tested-by: Erik Hugne <erik.hugne@...csson.com>
---
net/tipc/server.c | 44 ++++++++++++++++++++++++----
net/tipc/socket.c | 83 +----------------------------------------------------
net/tipc/socket.h | 4 ---
3 files changed, 39 insertions(+), 92 deletions(-)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index eadd4ed..a57c840 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -37,11 +37,13 @@
#include "core.h"
#include "socket.h"
#include <net/sock.h>
+#include <linux/module.h>
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
#define MAX_RECV_MSG_COUNT 25
#define CF_CONNECTED 1
+#define CF_SERVER 2
#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
@@ -88,9 +90,16 @@ static void tipc_clean_outqueues(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+ struct socket *sock = con->sock;
+ struct sock *sk;
- if (con->sock) {
- tipc_sock_release_local(con->sock);
+ if (sock) {
+ sk = sock->sk;
+ if (test_bit(CF_SERVER, &con->flags)) {
+ __module_get(sock->ops->owner);
+ __module_get(sk->sk_prot_creator->owner);
+ }
+ sk_release_kernel(sk);
con->sock = NULL;
}
@@ -281,7 +290,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
struct tipc_conn *newcon;
int ret;
- ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK);
+ ret = kernel_accept(sock, &newsock, O_NONBLOCK);
if (ret < 0)
return ret;
@@ -309,9 +318,12 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
struct socket *sock = NULL;
int ret;
- ret = tipc_sock_create_local(s->net, s->type, &sock);
+ ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock);
if (ret < 0)
return NULL;
+
+ sk_change_net(sock->sk, s->net);
+
ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
(char *)&s->imp, sizeof(s->imp));
if (ret < 0)
@@ -337,11 +349,31 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
pr_err("Unknown socket type %d\n", s->type);
goto create_err;
}
+
+ /* As server's listening socket owner and creator is the same module,
+ * we have to decrease TIPC module reference count to guarantee that
+ * it remains zero after the server socket is created, otherwise,
+ * executing "rmmod" command is unable to make TIPC module deleted
+ * after TIPC module is inserted successfully.
+ *
+ * However, the reference count is ever increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of TIPC socket's proto_ops struct; another is to increment the
+ * reference count of owner of TIPC proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(sock->ops->owner);
+ module_put(sock->sk->sk_prot_creator->owner);
+ set_bit(CF_SERVER, &con->flags);
+
return sock;
create_err:
- sock_release(sock);
- con->sock = NULL;
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
return NULL;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 934947f..813847d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -121,9 +121,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
-
static struct proto tipc_proto;
-static struct proto tipc_proto_kern;
static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
[TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -341,11 +339,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
}
/* Allocate socket's protocol area */
- if (!kern)
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
- else
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
-
+ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
if (sk == NULL)
return -ENOMEM;
@@ -383,75 +377,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
return 0;
}
-/**
- * tipc_sock_create_local - create TIPC socket from inside TIPC module
- * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
- *
- * We cannot use sock_creat_kern here because it bumps module user count.
- * Since socket owner and creator is the same module we must make sure
- * that module count remains zero for module local sockets, otherwise
- * we cannot do rmmod.
- *
- * Returns 0 on success, errno otherwise
- */
-int tipc_sock_create_local(struct net *net, int type, struct socket **res)
-{
- int rc;
-
- rc = sock_create_lite(AF_TIPC, type, 0, res);
- if (rc < 0) {
- pr_err("Failed to create kernel socket\n");
- return rc;
- }
- tipc_sk_create(net, *res, 0, 1);
-
- return 0;
-}
-
-/**
- * tipc_sock_release_local - release socket created by tipc_sock_create_local
- * @sock: the socket to be released.
- *
- * Module reference count is not incremented when such sockets are created,
- * so we must keep it from being decremented when they are released.
- */
-void tipc_sock_release_local(struct socket *sock)
-{
- tipc_release(sock);
- sock->ops = NULL;
- sock_release(sock);
-}
-
-/**
- * tipc_sock_accept_local - accept a connection on a socket created
- * with tipc_sock_create_local. Use this function to avoid that
- * module reference count is inadvertently incremented.
- *
- * @sock: the accepting socket
- * @newsock: reference to the new socket to be created
- * @flags: socket flags
- */
-
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags)
-{
- struct sock *sk = sock->sk;
- int ret;
-
- ret = sock_create_lite(sk->sk_family, sk->sk_type,
- sk->sk_protocol, newsock);
- if (ret < 0)
- return ret;
-
- ret = tipc_accept(sock, *newsock, flags);
- if (ret < 0) {
- sock_release(*newsock);
- return ret;
- }
- (*newsock)->ops = sock->ops;
- return ret;
-}
-
static void tipc_sk_callback(struct rcu_head *head)
{
struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
@@ -2608,12 +2533,6 @@ static struct proto tipc_proto = {
.sysctl_rmem = sysctl_tipc_rmem
};
-static struct proto tipc_proto_kern = {
- .name = "TIPC",
- .obj_size = sizeof(struct tipc_sock),
- .sysctl_rmem = sysctl_tipc_rmem
-};
-
/**
* tipc_socket_init - initialize TIPC socket interface
*
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 238f1b7..bf65513 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -44,10 +44,6 @@
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
int tipc_socket_init(void);
void tipc_socket_stop(void);
-int tipc_sock_create_local(struct net *net, int type, struct socket **res);
-void tipc_sock_release_local(struct socket *sock);
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags);
int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq);
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists