lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4E366A8F.201@hippy.csoma.elte.hu>
Date:	Mon, 01 Aug 2011 10:57:51 +0200
From:	synapse <synapse@...py.csoma.elte.hu>
To:	Eric Dumazet <eric.dumazet@...il.com>
CC:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: PROBLEM: BUG (NULL ptr dereference in ipv4_dst_check)

Hello

Sorry, I wasn't home on the weekend. Exactly to which tree should I 
apply this?
It doesn't apply cleanly to 3.0.0. Am I missing something?

Gergely Kalman


On 07/30/11 07:00, Eric Dumazet wrote:
> Le vendredi 29 juillet 2011 à 17:43 +0200, Eric Dumazet a écrit :
>
>> Oh well, we already use RCU in neigh_destroy(), so adding rcu would need
>> to change all dst_get_neighbour() callers to be in one rcu_read_lock()
>> section.
>>
>> I'll take a look, I suspect its mostly already done.
>>
>>
> Here is the patch I finally cooked and tested.
>
> Could you please test it as well Gergely ?
>
> Thanks !
>
> [PATCH] net: fix NULL dereferences in check_peer_redir()
>
> Gergely Kalman reported crashes in check_peer_redir().
>
> It appears commit f39925dbde778 (ipv4: Cache learned redirect
> information in inetpeer.) added a race, leading to possible NULL ptr
> dereference.
>
> Since we can now change dst neighbour, we should make sure a reader can
> safely use a neighbour.
>
> Add RCU protection to dst neighbour, and make sure check_peer_redir()
> can be called safely by different cpus in parallel.
>
> As neighbours are already freed after one RCU grace period, this patch
> should not add typical RCU penalty (cache cold effects)
>
> Many thanks to Gergely for providing a pretty report pointing to the
> bug.
>
> Reported-by: Gergely Kalman<synapse@...py.csoma.elte.hu>
> Signed-off-by: Eric Dumazet<eric.dumazet@...il.com>
> ---
>   include/net/dst.h     |   17 +++++++++++++----
>   net/ipv4/ip_output.c  |   10 ++++++++--
>   net/ipv4/route.c      |   14 ++++++++------
>   net/ipv6/addrconf.c   |    2 +-
>   net/ipv6/ip6_fib.c    |    2 +-
>   net/ipv6/ip6_output.c |   13 +++++++++++--
>   net/ipv6/route.c      |   35 +++++++++++++++++++++++++----------
>   7 files changed, 67 insertions(+), 26 deletions(-)
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 29e2557..13d507d 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -37,7 +37,7 @@ struct dst_entry {
>   	unsigned long		_metrics;
>   	unsigned long		expires;
>   	struct dst_entry	*path;
> -	struct neighbour	*_neighbour;
> +	struct neighbour __rcu	*_neighbour;
>   #ifdef CONFIG_XFRM
>   	struct xfrm_state	*xfrm;
>   #else
> @@ -88,12 +88,17 @@ struct dst_entry {
>
>   static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst)
>   {
> -	return dst->_neighbour;
> +	return rcu_dereference(dst->_neighbour);
> +}
> +
> +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst)
> +{
> +	return rcu_dereference_raw(dst->_neighbour);
>   }
>
>   static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh)
>   {
> -	dst->_neighbour = neigh;
> +	rcu_assign_pointer(dst->_neighbour, neigh);
>   }
>
>   extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
> @@ -382,8 +387,12 @@ static inline void dst_rcu_free(struct rcu_head *head)
>   static inline void dst_confirm(struct dst_entry *dst)
>   {
>   	if (dst) {
> -		struct neighbour *n = dst_get_neighbour(dst);
> +		struct neighbour *n;
> +
> +		rcu_read_lock();
> +		n = dst_get_neighbour(dst);
>   		neigh_confirm(n);
> +		rcu_read_unlock();
>   	}
>   }
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index ccaaa85..77d3ede 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
>   		skb = skb2;
>   	}
>
> +	rcu_read_lock();
>   	neigh = dst_get_neighbour(dst);
> -	if (neigh)
> -		return neigh_output(neigh, skb);
> +	if (neigh) {
> +		int res = neigh_output(neigh, skb);
> +
> +		rcu_read_unlock();
> +		return res;
> +	}
> +	rcu_read_unlock();
>
>   	if (net_ratelimit())
>   		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 1730689..6afc4eb 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
>   {
>   	struct rtable *rt = (struct rtable *) dst;
>   	__be32 orig_gw = rt->rt_gateway;
> -	struct neighbour *n;
> +	struct neighbour *n, *old_n;
>
>   	dst_confirm(&rt->dst);
>
> -	neigh_release(dst_get_neighbour(&rt->dst));
> -	dst_set_neighbour(&rt->dst, NULL);
> -
>   	rt->rt_gateway = peer->redirect_learned.a4;
> -	rt_bind_neighbour(rt);
> -	n = dst_get_neighbour(&rt->dst);
> +
> +	n = ipv4_neigh_lookup(&rt->dst,&rt->rt_gateway);
> +	if (IS_ERR(n))
> +		return PTR_ERR(n);
> +	old_n = xchg(&rt->dst._neighbour, n);
> +	if (old_n)
> +		neigh_release(old_n);
>   	if (!n || !(n->nud_state&  NUD_VALID)) {
>   		if (n)
>   			neigh_event_send(n, NULL);
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index a55500c..f012ebd 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
>   	 * layer address of our nexhop router
>   	 */
>
> -	if (dst_get_neighbour(&rt->dst) == NULL)
> +	if (dst_get_neighbour_raw(&rt->dst) == NULL)
>   		ifa->flags&= ~IFA_F_OPTIMISTIC;
>
>   	ifa->idev = idev;
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index 54a4678..320d91d 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
>   			RT6_TRACE("aging clone %p\n", rt);
>   			return -1;
>   		} else if ((rt->rt6i_flags&  RTF_GATEWAY)&&
> -			   (!(dst_get_neighbour(&rt->dst)->flags&  NTF_ROUTER))) {
> +			   (!(dst_get_neighbour_raw(&rt->dst)->flags&  NTF_ROUTER))) {
>   			RT6_TRACE("purging route %p via non-router but gateway\n",
>   				  rt);
>   			return -1;
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 32e5339..4c882cf 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
>   				skb->len);
>   	}
>
> +	rcu_read_lock();
>   	neigh = dst_get_neighbour(dst);
> -	if (neigh)
> -		return neigh_output(neigh, skb);
> +	if (neigh) {
> +		int res = neigh_output(neigh, skb);
>
> +		rcu_read_unlock();
> +		return res;
> +	}
> +	rcu_read_unlock();
>   	IP6_INC_STATS_BH(dev_net(dst->dev),
>   			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
>   	kfree_skb(skb);
> @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
>   	 * dst entry and replace it instead with the
>   	 * dst entry of the nexthop router
>   	 */
> +	rcu_read_lock();
>   	n = dst_get_neighbour(*dst);
>   	if (n&&  !(n->nud_state&  NUD_VALID)) {
>   		struct inet6_ifaddr *ifp;
>   		struct flowi6 fl_gw6;
>   		int redirect;
>
> +		rcu_read_unlock();
>   		ifp = ipv6_get_ifaddr(net,&fl6->saddr,
>   				      (*dst)->dev, 1);
>
> @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
>   			if ((err = (*dst)->error))
>   				goto out_err_release;
>   		}
> +	} else {
> +		rcu_read_unlock();
>   	}
>   #endif
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index e8987da..9e69eb0 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -364,7 +364,7 @@ out:
>   #ifdef CONFIG_IPV6_ROUTER_PREF
>   static void rt6_probe(struct rt6_info *rt)
>   {
> -	struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
> +	struct neighbour *neigh;
>   	/*
>   	 * Okay, this does not seem to be appropriate
>   	 * for now, however, we need to check if it
> @@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt)
>   	 * Router Reachability Probe MUST be rate-limited
>   	 * to no more than one per minute.
>   	 */
> +	rcu_read_lock();
> +	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
>   	if (!neigh || (neigh->nud_state&  NUD_VALID))
> -		return;
> +		goto out;
>   	read_lock_bh(&neigh->lock);
>   	if (!(neigh->nud_state&  NUD_VALID)&&
>   	time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
> @@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt)
>   		target = (struct in6_addr *)&neigh->primary_key;
>   		addrconf_addr_solict_mult(target,&mcaddr);
>   		ndisc_send_ns(rt->rt6i_dev, NULL, target,&mcaddr, NULL);
> -	} else
> +	} else {
>   		read_unlock_bh(&neigh->lock);
> +	}
> +out:
> +	rcu_read_unlock();
>   }
>   #else
>   static inline void rt6_probe(struct rt6_info *rt)
> @@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
>
>   static inline int rt6_check_neigh(struct rt6_info *rt)
>   {
> -	struct neighbour *neigh = dst_get_neighbour(&rt->dst);
> +	struct neighbour *neigh;
>   	int m;
> +
> +	rcu_read_lock();
> +	neigh = dst_get_neighbour(&rt->dst);
>   	if (rt->rt6i_flags&  RTF_NONEXTHOP ||
>   	    !(rt->rt6i_flags&  RTF_GATEWAY))
>   		m = 1;
> @@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
>   		read_unlock_bh(&neigh->lock);
>   	} else
>   		m = 0;
> +	rcu_read_unlock();
>   	return m;
>   }
>
> @@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
>   		rt->rt6i_dst.plen = 128;
>   		rt->rt6i_flags |= RTF_CACHE;
>   		rt->dst.flags |= DST_HOST;
> -		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
> +		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
>   	}
>   	return rt;
>   }
> @@ -803,7 +812,7 @@ restart:
>   	dst_hold(&rt->dst);
>   	read_unlock_bh(&table->tb6_lock);
>
> -	if (!dst_get_neighbour(&rt->dst)&&  !(rt->rt6i_flags&  RTF_NONEXTHOP))
> +	if (!dst_get_neighbour_raw(&rt->dst)&&  !(rt->rt6i_flags&  RTF_NONEXTHOP))
>   		nrt = rt6_alloc_cow(rt,&fl6->daddr,&fl6->saddr);
>   	else if (!(rt->dst.flags&  DST_HOST))
>   		nrt = rt6_alloc_clone(rt,&fl6->daddr);
> @@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
>   	dst_confirm(&rt->dst);
>
>   	/* Duplicate redirect: silently ignore. */
> -	if (neigh == dst_get_neighbour(&rt->dst))
> +	if (neigh == dst_get_neighbour_raw(&rt->dst))
>   		goto out;
>
>   	nrt = ip6_rt_copy(rt, dest);
> @@ -1682,7 +1691,7 @@ again:
>   	   1. It is connected route. Action: COW
>   	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
>   	 */
> -	if (!dst_get_neighbour(&rt->dst)&&  !(rt->rt6i_flags&  RTF_NONEXTHOP))
> +	if (!dst_get_neighbour_raw(&rt->dst)&&  !(rt->rt6i_flags&  RTF_NONEXTHOP))
>   		nrt = rt6_alloc_cow(rt, daddr, saddr);
>   	else
>   		nrt = rt6_alloc_clone(rt, daddr);
> @@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net,
>   	struct nlmsghdr *nlh;
>   	long expires;
>   	u32 table;
> +	struct neighbour *n;
>
>   	if (prefix) {	/* user wants prefix routes only */
>   		if (!(rt->rt6i_flags&  RTF_PREFIX_RT)) {
> @@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net,
>   	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst))<  0)
>   		goto nla_put_failure;
>
> -	if (dst_get_neighbour(&rt->dst))
> -		NLA_PUT(skb, RTA_GATEWAY, 16,&dst_get_neighbour(&rt->dst)->primary_key);
> +	rcu_read_lock();
> +	n = dst_get_neighbour(&rt->dst);
> +	if (n)
> +		NLA_PUT(skb, RTA_GATEWAY, 16,&n->primary_key);
> +	rcu_read_unlock();
>
>   	if (rt->dst.dev)
>   		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
> @@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
>   #else
>   	seq_puts(m, "00000000000000000000000000000000 00 ");
>   #endif
> +	rcu_read_lock();
>   	n = dst_get_neighbour(&rt->dst);
>   	if (n) {
>   		seq_printf(m, "%pi6", n->primary_key);
>   	} else {
>   		seq_puts(m, "00000000000000000000000000000000");
>   	}
> +	rcu_read_unlock();
>   	seq_printf(m, " %08x %08x %08x %08x %8s\n",
>   		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
>   		   rt->dst.__use, rt->rt6i_flags,
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ