[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171107092458.GF3857@worktop>
Date: Tue, 7 Nov 2017 10:24:58 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Florian Westphal <fw@...len.de>
Cc: netdev@...r.kernel.org
Subject: Re: [PATCH net-next 2/8] rtnetlink: add rtnl_register_module
On Tue, Nov 07, 2017 at 10:10:04AM +0100, Peter Zijlstra wrote:
> On Tue, Nov 07, 2017 at 07:11:56AM +0100, Florian Westphal wrote:
> > Peter Zijlstra <peterz@...radead.org> wrote:
> > > On Mon, Nov 06, 2017 at 11:51:07AM +0100, Florian Westphal wrote:
> > > > @@ -180,6 +164,12 @@ int __rtnl_register(int protocol, int msgtype,
> > > > rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
> > > > }
> > > >
> > > > + WARN_ON(tab[msgindex].owner && tab[msgindex].owner != owner);
> > > > +
> > > > + tab[msgindex].owner = owner;
> > > > + /* make sure owner is always visible first */
> > > > + smp_wmb();
> > > > +
> > > > if (doit)
> > > > tab[msgindex].doit = doit;
> > > > if (dumpit)
> > >
> > > > @@ -235,6 +279,9 @@ int rtnl_unregister(int protocol, int msgtype)
> > > > handlers[msgindex].doit = NULL;
> > > > handlers[msgindex].dumpit = NULL;
> > > > handlers[msgindex].flags = 0;
> > > > + /* make sure we clear owner last */
> > > > + smp_wmb();
> > > > + handlers[msgindex].owner = NULL;
> > > > rtnl_unlock();
> > > >
> > > > return 0;
> > >
> > > These wmb()'s don't make sense; and the comments are incomplete. What do
> > > they pair with? Who cares about this ordering?
> >
> > rtnetlink_rcv_msg:
> >
> > 4406 dumpit = READ_ONCE(handlers[type].dumpit);
> > 4407 if (!dumpit)
> > 4408 goto err_unlock;
> > 4409 owner = READ_ONCE(handlers[type].owner);
>
> So what stops the CPU from hoisting this load before the dumpit load?
>
> > 4410 }
> > ..
> > 4417 if (!try_module_get(owner))
> > 4418 err = -EPROTONOSUPPORT;
> > 4419
> >
> > I don't want dumpit function address to be visible before owner.
> > Does that make sense?
>
> And no. That's insane, how can it ever observe an incomplete tab in the
> first place.
>
> The problem is that __rtnl_register() and rtnl_unregister are broken.
>
> __rtnl_register() publishes the tab before it initializes it; allowing
> people to observe the thing incomplete.
>
> Also, are we required to hold rtnl_lock() across __rtnl_register()? I'd
> hope so, otherwise what stops concurrent allocations and leaking of tab?
>
> Also, rtnl_register() doesn't seen to employ rtnl_lock() and panic()
> WTF?!
>
> rtnl_unregister() should then RCU free the tab.
>
> None of that is happening, so what is that RCU stuff supposed to do?
Something like the below would go some way toward sanitizing this stuff;
rcu_assign_pointer() is a store-release, meaning it happens after
everything coming before.
Therefore, when you observe that tab (through rcu_dereference) you're
guaranteed to see the thing initialized. The memory ordering on the
consume side is through an address dependency; we need to have completed
the load of the tab pointer before we can compute the address of its
members and load from there, these are not things a CPU is allowed to
reorder (lets forget about Alpha).
Quite possibly, if rtnl_unregister() is called from module unload, this
is broken; in that case we'd need something like:
rcu_assign_pointer(rtnl_msg_handler[protocol], NULL);
/*
* Ensure nobody can still observe our old protocol handler
* before continuing to free the module that includes the
* functions called from it.
*/
synchronize_rcu();
---
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ace48926b19..25391c7b9c5d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -63,6 +63,7 @@ struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
unsigned int flags;
+ struct rcu_head rcu;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -172,14 +173,15 @@ int __rtnl_register(int protocol, int msgtype,
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
- if (tab == NULL) {
- tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
- if (tab == NULL)
- return -ENOBUFS;
+ if (WARN_ONCE(rtnl_msg_handler[protocol],
+ "Double registration for protocol: %d\n", protcol))
+ return -EEXIST;
- rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
- }
+ lockdep_assert_held(&rtnl_mutex);
+
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+ if (tab == NULL)
+ return -ENOBUFS;
if (doit)
tab[msgindex].doit = doit;
@@ -187,6 +189,8 @@ int __rtnl_register(int protocol, int msgtype,
tab[msgindex].dumpit = dumpit;
tab[msgindex].flags |= flags;
+ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
+
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_register);
@@ -227,15 +231,13 @@ int rtnl_unregister(int protocol, int msgtype)
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ handlers = rtnl_msg_handlers[protocol];
if (!handlers) {
rtnl_unlock();
return -ENOENT;
}
-
- handlers[msgindex].doit = NULL;
- handlers[msgindex].dumpit = NULL;
- handlers[msgindex].flags = 0;
+ rcu_assign_pointer(rtnl_msg_handler[protocol], NULL);
+ kfree_rcu(handlers, rcu);
rtnl_unlock();
return 0;
Powered by blists - more mailing lists