[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1268751635.3094.43.camel@edumazet-laptop>
Date: Tue, 16 Mar 2010 16:00:35 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: Stephen Hemminger <shemminger@...tta.com>
Cc: David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH] netlink: convert DIY reader/writer to mutex and RCU
(v2)
Le lundi 08 mars 2010 à 14:00 -0800, Stephen Hemminger a écrit :
> The netlink table locking was open coded version of reader/writer
> sleeping lock. Change to using mutex and RCU which makes
> code clearer, shorter, and simpler.
>
> Could use sk_list nulls but then would have to have kmem_cache
> for netlink handles and that seems like unnecessary bloat.
>
> Signed-off-by: Stephen Hemminger <shemminger@...tta.com>
>
> ---
> v1 -> v2 do RCU correctly...
> * use spinlock not mutex (not safe to sleep in normal RCU)
> * use _rcu variants of add/delete
> kfree(nlk->groups);
> nlk->groups = NULL;
> @@ -533,6 +477,8 @@ static int netlink_release(struct socket
> local_bh_disable();
> sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
> local_bh_enable();
> +
> + synchronize_rcu();
> sock_put(sk);
> return 0;
I am a bit scared by synchronize_rcu() proliferation. This can slow down
some workloads (some scripts invoking netlink commands, not using batch
mode)
We could change sk_prot_free() behavior and optionaly call a callback to
free a socket (and the module_put()) after rcu grace period. As the
rcu_head is not inside struct sock, I could not find a generic way to
code this.
For TCP/UDP sockets this was considered not a viable alternative, but
for other sockets its certainly better than synchronize_rcu() ?
diff --git a/include/net/sock.h b/include/net/sock.h
index 092b055..3a2d598 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -693,6 +693,8 @@ struct proto {
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
+ void (*free_socket)(struct sock *sk);
+
/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
void (*unhash)(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bb..3060c9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1027,18 +1027,20 @@ out_free:
static void sk_prot_free(struct proto *prot, struct sock *sk)
{
- struct kmem_cache *slab;
- struct module *owner;
+ security_sk_free(sk);
+ if (prot->free_socket)
+ prot->free_socket(sk);
+ else {
+ struct kmem_cache *slab;
- owner = prot->owner;
- slab = prot->slab;
+ slab = prot->slab;
- security_sk_free(sk);
- if (slab != NULL)
- kmem_cache_free(slab, sk);
- else
- kfree(sk);
- module_put(owner);
+ if (slab != NULL)
+ kmem_cache_free(slab, sk);
+ else
+ kfree(sk);
+ module_put(prot->owner);
+ }
}
/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 320d042..fae2344 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -81,6 +81,7 @@ struct netlink_sock {
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
struct module *module;
+ struct rcu_head rcu;
};
struct listeners_rcu_head {
@@ -394,10 +395,22 @@ static void netlink_remove(struct sock *sk)
netlink_table_ungrab();
}
+static void rcu_free_socket(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct netlink_sock, rcu));
+ module_put(THIS_MODULE);
+}
+
+static void free_socket(struct sock *sk)
+{
+ call_rcu(&nlk_sk(sk)->rcu, rcu_free_socket);
+}
+
static struct proto netlink_proto = {
.name = "NETLINK",
.owner = THIS_MODULE,
.obj_size = sizeof(struct netlink_sock),
+ .free_socket = free_socket,
};
static int __netlink_create(struct net *net, struct socket *sock,
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists