[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1403629511.3796.35.camel@edumazet-glaptop2.roam.corp.google.com>
Date: Tue, 24 Jun 2014 10:05:11 -0700
From: Eric Dumazet <eric.dumazet@...il.com>
To: dormando <dormando@...ia.net>, David Miller <davem@...emloft.net>
Cc: Alexey Preobrazhensky <preobr@...gle.com>,
Steffen Klassert <steffen.klassert@...unet.com>,
paulmck@...ux.vnet.ibm.com, netdev@...r.kernel.org,
Kostya Serebryany <kcc@...gle.com>,
Dmitry Vyukov <dvyukov@...gle.com>,
Lars Bull <larsbull@...gle.com>,
Eric Dumazet <edumazet@...gle.com>,
Bruce Curtis <brutus@...gle.com>,
Maciej Żenczykowski <maze@...gle.com>,
Alexei Starovoitov <alexei.starovoitov@...il.com>
Subject: [PATCH net] ipv4: fix dst race in sk_dst_get()
From: Eric Dumazet <edumazet@...gle.com>
When IP route cache had been removed in linux-3.6, we broke assumption
that dst entries were all freed after rcu grace period. DST_NOCACHE
dst were supposed to be freed from dst_release(). But it appears
we want to keep such dst around, either in UDP sockets or tunnels.
In sk_dst_get() we need to make sure dst refcount is not 0
before incrementing it, or else we might end up freeing a dst
twice.
DST_NOCACHE set on a dst does not mean this dst can not be attached
to a socket or a tunnel.
Then, before actual freeing, we need to observe a rcu grace period
to make sure all other cpus can catch the fact the dst is no longer
usable.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Reported-by: Dormando <dormando@...ia.net>
---
include/net/sock.h | 4 ++--
net/core/dst.c | 16 +++++++++++-----
net/ipv4/ip_tunnel.c | 14 +++++---------
3 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 07b7fcd60d80..173cae485de1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1730,8 +1730,8 @@ sk_dst_get(struct sock *sk)
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst)
- dst_hold(dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
rcu_read_unlock();
return dst;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 80d6286c8b62..a028409ee438 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -269,6 +269,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 097b3e7c1e8f..54b6731dab55 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
{
struct dst_entry *old_dst;
- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
}
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists