lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 14 Oct 2010 05:57:11 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	David Miller <davem@...emloft.net>
Cc:	daniel.lezcano@...e.fr, ebiederm@...ssion.com,
	hans.schillstrom@...csson.com, netdev@...r.kernel.org
Subject: Re: BUG ? ipip unregister_netdevice_many()

Le mercredi 13 octobre 2010 à 16:23 -0700, David Miller a écrit :
> From: Daniel Lezcano <daniel.lezcano@...e.fr>
> Date: Thu, 14 Oct 2010 00:16:15 +0200
> 
> > do you mind to wait I test the patch before merging it ?
> > I would like to stress a bit this routine with multiple containers.
> 
> Yes, it would be great if you could test this.
> 
> Please make sure you get the fix for the bug that
> Jarek found ('list' needs to be initialized to NULL)
> 
> I've included the latest version below:
> 
> diff --git a/include/net/route.h b/include/net/route.h
> index 7e5e73b..8d24761 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -106,7 +106,7 @@ extern int		ip_rt_init(void);
>  extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
>  				       __be32 src, struct net_device *dev);
>  extern void		rt_cache_flush(struct net *net, int how);
> -extern void		rt_cache_flush_batch(void);
> +extern void		rt_cache_flush_batch(struct net *net);
>  extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
>  extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
>  extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 919f2ad..4039f56 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -999,7 +999,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>  		rt_cache_flush(dev_net(dev), 0);
>  		break;
>  	case NETDEV_UNREGISTER_BATCH:
> -		rt_cache_flush_batch();
> +		rt_cache_flush_batch(dev_net(dev));
>  		break;
>  	}
>  	return NOTIFY_DONE;
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 0755aa4..6ad730c 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -712,13 +712,14 @@ static inline int rt_is_expired(struct rtable *rth)
>   * Can be called by a softirq or a process.
>   * In the later case, we want to be reschedule if necessary
>   */
> -static void rt_do_flush(int process_context)
> +static void rt_do_flush(struct net *net, int process_context)
>  {
>  	unsigned int i;
>  	struct rtable *rth, *next;
> -	struct rtable * tail;
>  
>  	for (i = 0; i <= rt_hash_mask; i++) {
> +		struct rtable *list = NULL, **pprev;
> +
>  		if (process_context && need_resched())
>  			cond_resched();
>  		rth = rt_hash_table[i].chain;
> @@ -726,41 +727,27 @@ static void rt_do_flush(int process_context)
>  			continue;
>  
>  		spin_lock_bh(rt_hash_lock_addr(i));
> -#ifdef CONFIG_NET_NS
> -		{
> -		struct rtable ** prev, * p;
>  
> -		rth = rt_hash_table[i].chain;
> +		pprev = &rt_hash_table[i].chain;
> +		rth = *pprev;
> +		while (rth) {
> +			next = rth->dst.rt_next;
> +			if (dev_net(rth->dst.dev) == net) {

		if (net_eq(dev_net(rth->dst.dev), net)) {


> +				*pprev = next;
>  
> -		/* defer releasing the head of the list after spin_unlock */
> -		for (tail = rth; tail; tail = tail->dst.rt_next)
> -			if (!rt_is_expired(tail))
> -				break;
> -		if (rth != tail)
> -			rt_hash_table[i].chain = tail;
> -
> -		/* call rt_free on entries after the tail requiring flush */
> -		prev = &rt_hash_table[i].chain;
> -		for (p = *prev; p; p = next) {
> -			next = p->dst.rt_next;
> -			if (!rt_is_expired(p)) {
> -				prev = &p->dst.rt_next;
> -			} else {
> -				*prev = next;
> -				rt_free(p);
> -			}
> -		}
> +				rth->dst.rt_next = list;
> +				list = rth;

I was wondering about RCU rules here.
We change pointers while a reader might enter in a loop.
It seems fine : At soon as we spin_unlock(), the loop should be closed.

Acked-by: Eric Dumazet <eric.dumazet@...il.com>

minor coding style : You should add a brace in the else clause :

pprev = &rt_hash_table[i].chain;
for (rth = *pprev; rth != NULL; rth = next) {
	next = rth->dst.rt_next;
	if (net_eq(dev_net(rth->dst.dev), net)) {
		*pprev = next;
		rth->dst.rt_next = list;
		list = rth;
	} else {
		pprev = &rth->dst.rt_next;
	}
}

Thanks !


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ