[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171107092458.GF3857@worktop>
Date:   Tue, 7 Nov 2017 10:24:58 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Florian Westphal <fw@...len.de>
Cc:     netdev@...r.kernel.org
Subject: Re: [PATCH net-next 2/8] rtnetlink: add rtnl_register_module
On Tue, Nov 07, 2017 at 10:10:04AM +0100, Peter Zijlstra wrote:
> On Tue, Nov 07, 2017 at 07:11:56AM +0100, Florian Westphal wrote:
> > Peter Zijlstra <peterz@...radead.org> wrote:
> > > On Mon, Nov 06, 2017 at 11:51:07AM +0100, Florian Westphal wrote:
> > > > @@ -180,6 +164,12 @@ int __rtnl_register(int protocol, int msgtype,
> > > >  		rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
> > > >  	}
> > > >  
> > > > +	WARN_ON(tab[msgindex].owner && tab[msgindex].owner != owner);
> > > > +
> > > > +	tab[msgindex].owner = owner;
> > > > +	/* make sure owner is always visible first */
> > > > +	smp_wmb();
> > > > +
> > > >  	if (doit)
> > > >  		tab[msgindex].doit = doit;
> > > >  	if (dumpit)
> > > 
> > > > @@ -235,6 +279,9 @@ int rtnl_unregister(int protocol, int msgtype)
> > > >  	handlers[msgindex].doit = NULL;
> > > >  	handlers[msgindex].dumpit = NULL;
> > > >  	handlers[msgindex].flags = 0;
> > > > +	/* make sure we clear owner last */
> > > > +	smp_wmb();
> > > > +	handlers[msgindex].owner = NULL;
> > > >  	rtnl_unlock();
> > > >  
> > > >  	return 0;
> > > 
> > > These wmb()'s don't make sense; and the comments are incomplete. What do
> > > they pair with? Who cares about this ordering?
> > 
> > rtnetlink_rcv_msg:
> > 
> > 4406                         dumpit = READ_ONCE(handlers[type].dumpit);
> > 4407                         if (!dumpit)
> > 4408                                 goto err_unlock;
> > 4409                         owner = READ_ONCE(handlers[type].owner);
> 
> So what stops the CPU from hoisting this load before the dumpit load?
> 
> > 4410                 }
> > ..
> > 4417                 if (!try_module_get(owner))
> > 4418                         err = -EPROTONOSUPPORT;
> > 4419 
> > 
> > I don't want dumpit function address to be visible before owner.
> > Does that make sense?
> 
> And no. That's insane, how can it ever observe an incomplete tab in the
> first place.
> 
> The problem is that __rtnl_register() and rtnl_unregister are broken.
> 
> __rtnl_register() publishes the tab before it initializes it; allowing
> people to observe the thing incomplete.
> 
> Also, are we required to hold rtnl_lock() across __rtnl_register()? I'd
> hope so, otherwise what stops concurrent allocations and leaking of tab?
> 
> Also, rtnl_register() doesn't seen to employ rtnl_lock() and panic()
> WTF?!
> 
> rtnl_unregister() should then RCU free the tab.
> 
> None of that is happening, so what is that RCU stuff supposed to do?
Something like the below would go some way toward sanitizing this stuff;
rcu_assign_pointer() is a store-release, meaning it happens after
everything coming before.
Therefore, when you observe that tab (through rcu_dereference) you're
guaranteed to see the thing initialized. The memory ordering on the
consume side is through an address dependency; we need to have completed
the load of the tab pointer before we can compute the address of its
members and load from there, these are not things a CPU is allowed to
reorder (lets forget about Alpha).
Quite possibly, if rtnl_unregister() is called from module unload, this
is broken; in that case we'd need something like:
	rcu_assign_pointer(rtnl_msg_handler[protocol], NULL);
	/*
	 * Ensure nobody can still observe our old protocol handler
	 * before continuing to free the module that includes the
	 * functions called from it.
	 */
	synchronize_rcu();
---
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ace48926b19..25391c7b9c5d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -63,6 +63,7 @@ struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
 	unsigned int		flags;
+	struct rcu_head		rcu;
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
@@ -172,14 +173,15 @@ int __rtnl_register(int protocol, int msgtype,
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
-	tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
-	if (tab == NULL) {
-		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
-		if (tab == NULL)
-			return -ENOBUFS;
+	if (WARN_ONCE(rtnl_msg_handler[protocol],
+		      "Double registration for protocol: %d\n", protcol))
+		return -EEXIST;
 
-		rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
-	}
+	lockdep_assert_held(&rtnl_mutex);
+
+	tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+	if (tab == NULL)
+		return -ENOBUFS;
 
 	if (doit)
 		tab[msgindex].doit = doit;
@@ -187,6 +189,8 @@ int __rtnl_register(int protocol, int msgtype,
 		tab[msgindex].dumpit = dumpit;
 	tab[msgindex].flags |= flags;
 
+	rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__rtnl_register);
@@ -227,15 +231,13 @@ int rtnl_unregister(int protocol, int msgtype)
 	msgindex = rtm_msgindex(msgtype);
 
 	rtnl_lock();
-	handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+	handlers = rtnl_msg_handlers[protocol];
 	if (!handlers) {
 		rtnl_unlock();
 		return -ENOENT;
 	}
-
-	handlers[msgindex].doit = NULL;
-	handlers[msgindex].dumpit = NULL;
-	handlers[msgindex].flags = 0;
+	rcu_assign_pointer(rtnl_msg_handler[protocol], NULL);
+	kfree_rcu(handlers, rcu);
 	rtnl_unlock();
 
 	return 0;
Powered by blists - more mailing lists
 
