[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220826170315.96700-1-kuniyu@amazon.com>
Date: Fri, 26 Aug 2022 10:03:15 -0700
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <edumazet@...gle.com>
CC: <chuck.lever@...cle.com>, <davem@...emloft.net>,
<jlayton@...nel.org>, <keescook@...omium.org>, <kuba@...nel.org>,
<kuni1840@...il.com>, <kuniyu@...zon.com>,
<linux-fsdevel@...r.kernel.org>, <mcgrof@...nel.org>,
<netdev@...r.kernel.org>, <pabeni@...hat.com>, <yzaikin@...gle.com>
Subject: Re: [PATCH v1 net-next 04/13] net: Introduce init2() for pernet_operations.
From: Eric Dumazet <edumazet@...gle.com>
Date: Fri, 26 Aug 2022 08:20:06 -0700
> On Thu, Aug 25, 2022 at 5:06 PM Kuniyuki Iwashima <kuniyu@...zon.com> wrote:
> >
> > This patch adds a new init function for pernet_operations, init2().
>
> Why ?
>
> This seems not really needed...
>
> TCP ops->init can trivially reach the parent net_ns if needed,
> because the current process is the one doing the creation of a new net_ns.
Yes, it's true because IPv4 TCP/UDP are both unloadable.
At first, I was thinking of a general interface, but I'm fine
to drop this patch and access current->nsproxy->net_ns like
sysctl_devconf_inherit_init_net does.
>
> >
> > We call each init2() during clone() or unshare() only, where we can
> > access the parent netns for a child netns creation.
> >
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> > ---
> > include/net/net_namespace.h | 3 +++
> > net/core/net_namespace.c | 18 +++++++++++-------
> > 2 files changed, 14 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > index 8c3587d5c308..3ca426649756 100644
> > --- a/include/net/net_namespace.h
> > +++ b/include/net/net_namespace.h
> > @@ -410,6 +410,8 @@ struct pernet_operations {
> > * from register_pernet_subsys(), unregister_pernet_subsys()
> > * register_pernet_device() and unregister_pernet_device().
> > *
> > + * init2() is called during clone() or unshare() only.
> > + *
> > * Exit methods using blocking RCU primitives, such as
> > * synchronize_rcu(), should be implemented via exit_batch.
> > * Then, destruction of a group of net requires single
> > @@ -422,6 +424,7 @@ struct pernet_operations {
> > * the calls.
> > */
> > int (*init)(struct net *net);
> > + int (*init2)(struct net *net, struct net *old_net);
> > void (*pre_exit)(struct net *net);
> > void (*exit)(struct net *net);
> > void (*exit_batch)(struct list_head *net_exit_list);
> > diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> > index 6b9f19122ec1..b120ff97d9f5 100644
> > --- a/net/core/net_namespace.c
> > +++ b/net/core/net_namespace.c
> > @@ -116,7 +116,8 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
> > return 0;
> > }
> >
> > -static int ops_init(const struct pernet_operations *ops, struct net *net)
> > +static int ops_init(const struct pernet_operations *ops,
> > + struct net *net, struct net *old_net)
> > {
> > int err = -ENOMEM;
> > void *data = NULL;
> > @@ -133,6 +134,8 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
> > err = 0;
> > if (ops->init)
> > err = ops->init(net);
> > + if (!err && ops->init2 && old_net)
> > + err = ops->init2(net, old_net);
>
> If an error comes here, while ops->init() was a success, we probably
> leave things in a bad state (memory leak ?)
Somehow I thought .exit() should handle the case, yes, it's really bad
design... at least I should have added .exit2().
I'll drop this in v2.
Thank you!
>
> > if (!err)
> > return 0;
> >
> > @@ -301,7 +304,8 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
> > /*
> > * setup_net runs the initializers for the network namespace object.
> > */
> > -static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
> > +static __net_init int setup_net(struct net *net, struct net *old_net,
> > + struct user_namespace *user_ns)
> > {
> > /* Must be called with pernet_ops_rwsem held */
> > const struct pernet_operations *ops, *saved_ops;
> > @@ -323,7 +327,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
> > mutex_init(&net->ipv4.ra_mutex);
> >
> > list_for_each_entry(ops, &pernet_list, list) {
> > - error = ops_init(ops, net);
> > + error = ops_init(ops, net, old_net);
> > if (error < 0)
> > goto out_undo;
> > }
> > @@ -469,7 +473,7 @@ struct net *copy_net_ns(unsigned long flags,
> > if (rv < 0)
> > goto put_userns;
> >
> > - rv = setup_net(net, user_ns);
> > + rv = setup_net(net, old_net, user_ns);
> >
> > up_read(&pernet_ops_rwsem);
> >
> > @@ -1107,7 +1111,7 @@ void __init net_ns_init(void)
> > init_net.key_domain = &init_net_key_domain;
> > #endif
> > down_write(&pernet_ops_rwsem);
> > - if (setup_net(&init_net, &init_user_ns))
> > + if (setup_net(&init_net, NULL, &init_user_ns))
> > panic("Could not setup the initial network namespace");
> >
> > init_net_initialized = true;
> > @@ -1148,7 +1152,7 @@ static int __register_pernet_operations(struct list_head *list,
> >
> > memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net));
> > old = set_active_memcg(memcg);
> > - error = ops_init(ops, net);
> > + error = ops_init(ops, net, NULL);
> > set_active_memcg(old);
> > mem_cgroup_put(memcg);
> > if (error)
> > @@ -1188,7 +1192,7 @@ static int __register_pernet_operations(struct list_head *list,
> > return 0;
> > }
> >
> > - return ops_init(ops, &init_net);
> > + return ops_init(ops, &init_net, NULL);
> > }
> >
> > static void __unregister_pernet_operations(struct pernet_operations *ops)
> > --
> > 2.30.2
> >
Powered by blists - more mailing lists