[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4E366A8F.201@hippy.csoma.elte.hu>
Date: Mon, 01 Aug 2011 10:57:51 +0200
From: synapse <synapse@...py.csoma.elte.hu>
To: Eric Dumazet <eric.dumazet@...il.com>
CC: David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: PROBLEM: BUG (NULL ptr dereference in ipv4_dst_check)
Hello
Sorry, I wasn't home on the weekend. Exactly to which tree should I
apply this?
It doesn't apply cleanly to 3.0.0. Am I missing something?
Gergely Kalman
On 07/30/11 07:00, Eric Dumazet wrote:
> Le vendredi 29 juillet 2011 à 17:43 +0200, Eric Dumazet a écrit :
>
>> Oh well, we already use RCU in neigh_destroy(), so adding rcu would need
>> to change all dst_get_neighbour() callers to be in one rcu_read_lock()
>> section.
>>
>> I'll take a look, I suspect its mostly already done.
>>
>>
> Here is the patch I finally cooked and tested.
>
> Could you please test it as well Gergely ?
>
> Thanks !
>
> [PATCH] net: fix NULL dereferences in check_peer_redir()
>
> Gergely Kalman reported crashes in check_peer_redir().
>
> It appears commit f39925dbde778 (ipv4: Cache learned redirect
> information in inetpeer.) added a race, leading to possible NULL ptr
> dereference.
>
> Since we can now change dst neighbour, we should make sure a reader can
> safely use a neighbour.
>
> Add RCU protection to dst neighbour, and make sure check_peer_redir()
> can be called safely by different cpus in parallel.
>
> As neighbours are already freed after one RCU grace period, this patch
> should not add typical RCU penalty (cache cold effects)
>
> Many thanks to Gergely for providing a pretty report pointing to the
> bug.
>
> Reported-by: Gergely Kalman<synapse@...py.csoma.elte.hu>
> Signed-off-by: Eric Dumazet<eric.dumazet@...il.com>
> ---
> include/net/dst.h | 17 +++++++++++++----
> net/ipv4/ip_output.c | 10 ++++++++--
> net/ipv4/route.c | 14 ++++++++------
> net/ipv6/addrconf.c | 2 +-
> net/ipv6/ip6_fib.c | 2 +-
> net/ipv6/ip6_output.c | 13 +++++++++++--
> net/ipv6/route.c | 35 +++++++++++++++++++++++++----------
> 7 files changed, 67 insertions(+), 26 deletions(-)
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 29e2557..13d507d 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -37,7 +37,7 @@ struct dst_entry {
> unsigned long _metrics;
> unsigned long expires;
> struct dst_entry *path;
> - struct neighbour *_neighbour;
> + struct neighbour __rcu *_neighbour;
> #ifdef CONFIG_XFRM
> struct xfrm_state *xfrm;
> #else
> @@ -88,12 +88,17 @@ struct dst_entry {
>
> static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst)
> {
> - return dst->_neighbour;
> + return rcu_dereference(dst->_neighbour);
> +}
> +
> +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst)
> +{
> + return rcu_dereference_raw(dst->_neighbour);
> }
>
> static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh)
> {
> - dst->_neighbour = neigh;
> + rcu_assign_pointer(dst->_neighbour, neigh);
> }
>
> extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
> @@ -382,8 +387,12 @@ static inline void dst_rcu_free(struct rcu_head *head)
> static inline void dst_confirm(struct dst_entry *dst)
> {
> if (dst) {
> - struct neighbour *n = dst_get_neighbour(dst);
> + struct neighbour *n;
> +
> + rcu_read_lock();
> + n = dst_get_neighbour(dst);
> neigh_confirm(n);
> + rcu_read_unlock();
> }
> }
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index ccaaa85..77d3ede 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
> skb = skb2;
> }
>
> + rcu_read_lock();
> neigh = dst_get_neighbour(dst);
> - if (neigh)
> - return neigh_output(neigh, skb);
> + if (neigh) {
> + int res = neigh_output(neigh, skb);
> +
> + rcu_read_unlock();
> + return res;
> + }
> + rcu_read_unlock();
>
> if (net_ratelimit())
> printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 1730689..6afc4eb 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
> {
> struct rtable *rt = (struct rtable *) dst;
> __be32 orig_gw = rt->rt_gateway;
> - struct neighbour *n;
> + struct neighbour *n, *old_n;
>
> dst_confirm(&rt->dst);
>
> - neigh_release(dst_get_neighbour(&rt->dst));
> - dst_set_neighbour(&rt->dst, NULL);
> -
> rt->rt_gateway = peer->redirect_learned.a4;
> - rt_bind_neighbour(rt);
> - n = dst_get_neighbour(&rt->dst);
> +
> + n = ipv4_neigh_lookup(&rt->dst,&rt->rt_gateway);
> + if (IS_ERR(n))
> + return PTR_ERR(n);
> + old_n = xchg(&rt->dst._neighbour, n);
> + if (old_n)
> + neigh_release(old_n);
> if (!n || !(n->nud_state& NUD_VALID)) {
> if (n)
> neigh_event_send(n, NULL);
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index a55500c..f012ebd 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
> * layer address of our nexhop router
> */
>
> - if (dst_get_neighbour(&rt->dst) == NULL)
> + if (dst_get_neighbour_raw(&rt->dst) == NULL)
> ifa->flags&= ~IFA_F_OPTIMISTIC;
>
> ifa->idev = idev;
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index 54a4678..320d91d 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
> RT6_TRACE("aging clone %p\n", rt);
> return -1;
> } else if ((rt->rt6i_flags& RTF_GATEWAY)&&
> - (!(dst_get_neighbour(&rt->dst)->flags& NTF_ROUTER))) {
> + (!(dst_get_neighbour_raw(&rt->dst)->flags& NTF_ROUTER))) {
> RT6_TRACE("purging route %p via non-router but gateway\n",
> rt);
> return -1;
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 32e5339..4c882cf 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
> skb->len);
> }
>
> + rcu_read_lock();
> neigh = dst_get_neighbour(dst);
> - if (neigh)
> - return neigh_output(neigh, skb);
> + if (neigh) {
> + int res = neigh_output(neigh, skb);
>
> + rcu_read_unlock();
> + return res;
> + }
> + rcu_read_unlock();
> IP6_INC_STATS_BH(dev_net(dst->dev),
> ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> kfree_skb(skb);
> @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
> * dst entry and replace it instead with the
> * dst entry of the nexthop router
> */
> + rcu_read_lock();
> n = dst_get_neighbour(*dst);
> if (n&& !(n->nud_state& NUD_VALID)) {
> struct inet6_ifaddr *ifp;
> struct flowi6 fl_gw6;
> int redirect;
>
> + rcu_read_unlock();
> ifp = ipv6_get_ifaddr(net,&fl6->saddr,
> (*dst)->dev, 1);
>
> @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
> if ((err = (*dst)->error))
> goto out_err_release;
> }
> + } else {
> + rcu_read_unlock();
> }
> #endif
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index e8987da..9e69eb0 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -364,7 +364,7 @@ out:
> #ifdef CONFIG_IPV6_ROUTER_PREF
> static void rt6_probe(struct rt6_info *rt)
> {
> - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
> + struct neighbour *neigh;
> /*
> * Okay, this does not seem to be appropriate
> * for now, however, we need to check if it
> @@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt)
> * Router Reachability Probe MUST be rate-limited
> * to no more than one per minute.
> */
> + rcu_read_lock();
> + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
> if (!neigh || (neigh->nud_state& NUD_VALID))
> - return;
> + goto out;
> read_lock_bh(&neigh->lock);
> if (!(neigh->nud_state& NUD_VALID)&&
> time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
> @@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt)
> target = (struct in6_addr *)&neigh->primary_key;
> addrconf_addr_solict_mult(target,&mcaddr);
> ndisc_send_ns(rt->rt6i_dev, NULL, target,&mcaddr, NULL);
> - } else
> + } else {
> read_unlock_bh(&neigh->lock);
> + }
> +out:
> + rcu_read_unlock();
> }
> #else
> static inline void rt6_probe(struct rt6_info *rt)
> @@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
>
> static inline int rt6_check_neigh(struct rt6_info *rt)
> {
> - struct neighbour *neigh = dst_get_neighbour(&rt->dst);
> + struct neighbour *neigh;
> int m;
> +
> + rcu_read_lock();
> + neigh = dst_get_neighbour(&rt->dst);
> if (rt->rt6i_flags& RTF_NONEXTHOP ||
> !(rt->rt6i_flags& RTF_GATEWAY))
> m = 1;
> @@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
> read_unlock_bh(&neigh->lock);
> } else
> m = 0;
> + rcu_read_unlock();
> return m;
> }
>
> @@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
> rt->rt6i_dst.plen = 128;
> rt->rt6i_flags |= RTF_CACHE;
> rt->dst.flags |= DST_HOST;
> - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
> + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
> }
> return rt;
> }
> @@ -803,7 +812,7 @@ restart:
> dst_hold(&rt->dst);
> read_unlock_bh(&table->tb6_lock);
>
> - if (!dst_get_neighbour(&rt->dst)&& !(rt->rt6i_flags& RTF_NONEXTHOP))
> + if (!dst_get_neighbour_raw(&rt->dst)&& !(rt->rt6i_flags& RTF_NONEXTHOP))
> nrt = rt6_alloc_cow(rt,&fl6->daddr,&fl6->saddr);
> else if (!(rt->dst.flags& DST_HOST))
> nrt = rt6_alloc_clone(rt,&fl6->daddr);
> @@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
> dst_confirm(&rt->dst);
>
> /* Duplicate redirect: silently ignore. */
> - if (neigh == dst_get_neighbour(&rt->dst))
> + if (neigh == dst_get_neighbour_raw(&rt->dst))
> goto out;
>
> nrt = ip6_rt_copy(rt, dest);
> @@ -1682,7 +1691,7 @@ again:
> 1. It is connected route. Action: COW
> 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
> */
> - if (!dst_get_neighbour(&rt->dst)&& !(rt->rt6i_flags& RTF_NONEXTHOP))
> + if (!dst_get_neighbour_raw(&rt->dst)&& !(rt->rt6i_flags& RTF_NONEXTHOP))
> nrt = rt6_alloc_cow(rt, daddr, saddr);
> else
> nrt = rt6_alloc_clone(rt, daddr);
> @@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net,
> struct nlmsghdr *nlh;
> long expires;
> u32 table;
> + struct neighbour *n;
>
> if (prefix) { /* user wants prefix routes only */
> if (!(rt->rt6i_flags& RTF_PREFIX_RT)) {
> @@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net,
> if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst))< 0)
> goto nla_put_failure;
>
> - if (dst_get_neighbour(&rt->dst))
> - NLA_PUT(skb, RTA_GATEWAY, 16,&dst_get_neighbour(&rt->dst)->primary_key);
> + rcu_read_lock();
> + n = dst_get_neighbour(&rt->dst);
> + if (n)
> + NLA_PUT(skb, RTA_GATEWAY, 16,&n->primary_key);
> + rcu_read_unlock();
>
> if (rt->dst.dev)
> NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
> @@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
> #else
> seq_puts(m, "00000000000000000000000000000000 00 ");
> #endif
> + rcu_read_lock();
> n = dst_get_neighbour(&rt->dst);
> if (n) {
> seq_printf(m, "%pi6", n->primary_key);
> } else {
> seq_puts(m, "00000000000000000000000000000000");
> }
> + rcu_read_unlock();
> seq_printf(m, " %08x %08x %08x %08x %8s\n",
> rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
> rt->dst.__use, rt->rt6i_flags,
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists