diff mbox series

[v1,net-next,04/13] net: Introduce init2() for pernet_operations.

Message ID 20220826000445.46552-5-kuniyu@amazon.com (mailing list archive)
State New, archived
Headers show
Series tcp/udp: Introduce optional per-netns hash table. | expand

Commit Message

Kuniyuki Iwashima Aug. 26, 2022, 12:04 a.m. UTC
This patch adds a new init function for pernet_operations, init2().

We call each init2() during clone() or unshare() only, where we can
access the parent netns for a child netns creation.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/net_namespace.h |  3 +++
 net/core/net_namespace.c    | 18 +++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

Comments

Eric Dumazet Aug. 26, 2022, 3:20 p.m. UTC | #1
On Thu, Aug 25, 2022 at 5:06 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> This patch adds a new init function for pernet_operations, init2().

Why ?

This seems not really needed...

TCP ops->init can trivially reach the parent net_ns if needed,
because the current process is the one doing the creation of a new net_ns.

>
> We call each init2() during clone() or unshare() only, where we can
> access the parent netns for a child netns creation.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  include/net/net_namespace.h |  3 +++
>  net/core/net_namespace.c    | 18 +++++++++++-------
>  2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 8c3587d5c308..3ca426649756 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -410,6 +410,8 @@ struct pernet_operations {
>          * from register_pernet_subsys(), unregister_pernet_subsys()
>          * register_pernet_device() and unregister_pernet_device().
>          *
> +        * init2() is called during clone() or unshare() only.
> +        *
>          * Exit methods using blocking RCU primitives, such as
>          * synchronize_rcu(), should be implemented via exit_batch.
>          * Then, destruction of a group of net requires single
> @@ -422,6 +424,7 @@ struct pernet_operations {
>          * the calls.
>          */
>         int (*init)(struct net *net);
> +       int (*init2)(struct net *net, struct net *old_net);
>         void (*pre_exit)(struct net *net);
>         void (*exit)(struct net *net);
>         void (*exit_batch)(struct list_head *net_exit_list);
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 6b9f19122ec1..b120ff97d9f5 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -116,7 +116,8 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
>         return 0;
>  }
>
> -static int ops_init(const struct pernet_operations *ops, struct net *net)
> +static int ops_init(const struct pernet_operations *ops,
> +                   struct net *net, struct net *old_net)
>  {
>         int err = -ENOMEM;
>         void *data = NULL;
> @@ -133,6 +134,8 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
>         err = 0;
>         if (ops->init)
>                 err = ops->init(net);
> +       if (!err && ops->init2 && old_net)
> +               err = ops->init2(net, old_net);

If an error comes here, while ops->init() was a success, we probably
leave things in a bad state (memory leak ?)

>         if (!err)
>                 return 0;
>
> @@ -301,7 +304,8 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
>  /*
>   * setup_net runs the initializers for the network namespace object.
>   */
> -static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
> +static __net_init int setup_net(struct net *net, struct net *old_net,
> +                               struct user_namespace *user_ns)
>  {
>         /* Must be called with pernet_ops_rwsem held */
>         const struct pernet_operations *ops, *saved_ops;
> @@ -323,7 +327,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>         mutex_init(&net->ipv4.ra_mutex);
>
>         list_for_each_entry(ops, &pernet_list, list) {
> -               error = ops_init(ops, net);
> +               error = ops_init(ops, net, old_net);
>                 if (error < 0)
>                         goto out_undo;
>         }
> @@ -469,7 +473,7 @@ struct net *copy_net_ns(unsigned long flags,
>         if (rv < 0)
>                 goto put_userns;
>
> -       rv = setup_net(net, user_ns);
> +       rv = setup_net(net, old_net, user_ns);
>
>         up_read(&pernet_ops_rwsem);
>
> @@ -1107,7 +1111,7 @@ void __init net_ns_init(void)
>         init_net.key_domain = &init_net_key_domain;
>  #endif
>         down_write(&pernet_ops_rwsem);
> -       if (setup_net(&init_net, &init_user_ns))
> +       if (setup_net(&init_net, NULL, &init_user_ns))
>                 panic("Could not setup the initial network namespace");
>
>         init_net_initialized = true;
> @@ -1148,7 +1152,7 @@ static int __register_pernet_operations(struct list_head *list,
>
>                         memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net));
>                         old = set_active_memcg(memcg);
> -                       error = ops_init(ops, net);
> +                       error = ops_init(ops, net, NULL);
>                         set_active_memcg(old);
>                         mem_cgroup_put(memcg);
>                         if (error)
> @@ -1188,7 +1192,7 @@ static int __register_pernet_operations(struct list_head *list,
>                 return 0;
>         }
>
> -       return ops_init(ops, &init_net);
> +       return ops_init(ops, &init_net, NULL);
>  }
>
>  static void __unregister_pernet_operations(struct pernet_operations *ops)
> --
> 2.30.2
>
Kuniyuki Iwashima Aug. 26, 2022, 5:03 p.m. UTC | #2
From:   Eric Dumazet <edumazet@google.com>
Date:   Fri, 26 Aug 2022 08:20:06 -0700
> On Thu, Aug 25, 2022 at 5:06 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > This patch adds a new init function for pernet_operations, init2().
> 
> Why ?
> 
> This seems not really needed...
> 
> TCP ops->init can trivially reach the parent net_ns if needed,
> because the current process is the one doing the creation of a new net_ns.

Yes, it's true because IPv4 TCP/UDP are both unloadable.

At first, I was thinking of a general interface, but I'm fine
to drop this patch and access current->nsproxy->net_ns like
sysctl_devconf_inherit_init_net does.


> 
> >
> > We call each init2() during clone() or unshare() only, where we can
> > access the parent netns for a child netns creation.
> >
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> >  include/net/net_namespace.h |  3 +++
> >  net/core/net_namespace.c    | 18 +++++++++++-------
> >  2 files changed, 14 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > index 8c3587d5c308..3ca426649756 100644
> > --- a/include/net/net_namespace.h
> > +++ b/include/net/net_namespace.h
> > @@ -410,6 +410,8 @@ struct pernet_operations {
> >          * from register_pernet_subsys(), unregister_pernet_subsys()
> >          * register_pernet_device() and unregister_pernet_device().
> >          *
> > +        * init2() is called during clone() or unshare() only.
> > +        *
> >          * Exit methods using blocking RCU primitives, such as
> >          * synchronize_rcu(), should be implemented via exit_batch.
> >          * Then, destruction of a group of net requires single
> > @@ -422,6 +424,7 @@ struct pernet_operations {
> >          * the calls.
> >          */
> >         int (*init)(struct net *net);
> > +       int (*init2)(struct net *net, struct net *old_net);
> >         void (*pre_exit)(struct net *net);
> >         void (*exit)(struct net *net);
> >         void (*exit_batch)(struct list_head *net_exit_list);
> > diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> > index 6b9f19122ec1..b120ff97d9f5 100644
> > --- a/net/core/net_namespace.c
> > +++ b/net/core/net_namespace.c
> > @@ -116,7 +116,8 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
> >         return 0;
> >  }
> >
> > -static int ops_init(const struct pernet_operations *ops, struct net *net)
> > +static int ops_init(const struct pernet_operations *ops,
> > +                   struct net *net, struct net *old_net)
> >  {
> >         int err = -ENOMEM;
> >         void *data = NULL;
> > @@ -133,6 +134,8 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
> >         err = 0;
> >         if (ops->init)
> >                 err = ops->init(net);
> > +       if (!err && ops->init2 && old_net)
> > +               err = ops->init2(net, old_net);
> 
> If an error comes here, while ops->init() was a success, we probably
> leave things in a bad state (memory leak ?)

Somehow I thought .exit() should handle the case, yes, it's really bad
design... at least I should have added .exit2().

I'll drop this in v2.

Thank you!


> 
> >         if (!err)
> >                 return 0;
> >
> > @@ -301,7 +304,8 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
> >  /*
> >   * setup_net runs the initializers for the network namespace object.
> >   */
> > -static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
> > +static __net_init int setup_net(struct net *net, struct net *old_net,
> > +                               struct user_namespace *user_ns)
> >  {
> >         /* Must be called with pernet_ops_rwsem held */
> >         const struct pernet_operations *ops, *saved_ops;
> > @@ -323,7 +327,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
> >         mutex_init(&net->ipv4.ra_mutex);
> >
> >         list_for_each_entry(ops, &pernet_list, list) {
> > -               error = ops_init(ops, net);
> > +               error = ops_init(ops, net, old_net);
> >                 if (error < 0)
> >                         goto out_undo;
> >         }
> > @@ -469,7 +473,7 @@ struct net *copy_net_ns(unsigned long flags,
> >         if (rv < 0)
> >                 goto put_userns;
> >
> > -       rv = setup_net(net, user_ns);
> > +       rv = setup_net(net, old_net, user_ns);
> >
> >         up_read(&pernet_ops_rwsem);
> >
> > @@ -1107,7 +1111,7 @@ void __init net_ns_init(void)
> >         init_net.key_domain = &init_net_key_domain;
> >  #endif
> >         down_write(&pernet_ops_rwsem);
> > -       if (setup_net(&init_net, &init_user_ns))
> > +       if (setup_net(&init_net, NULL, &init_user_ns))
> >                 panic("Could not setup the initial network namespace");
> >
> >         init_net_initialized = true;
> > @@ -1148,7 +1152,7 @@ static int __register_pernet_operations(struct list_head *list,
> >
> >                         memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net));
> >                         old = set_active_memcg(memcg);
> > -                       error = ops_init(ops, net);
> > +                       error = ops_init(ops, net, NULL);
> >                         set_active_memcg(old);
> >                         mem_cgroup_put(memcg);
> >                         if (error)
> > @@ -1188,7 +1192,7 @@ static int __register_pernet_operations(struct list_head *list,
> >                 return 0;
> >         }
> >
> > -       return ops_init(ops, &init_net);
> > +       return ops_init(ops, &init_net, NULL);
> >  }
> >
> >  static void __unregister_pernet_operations(struct pernet_operations *ops)
> > --
> > 2.30.2
> >
diff mbox series

Patch

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 8c3587d5c308..3ca426649756 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -410,6 +410,8 @@  struct pernet_operations {
 	 * from register_pernet_subsys(), unregister_pernet_subsys()
 	 * register_pernet_device() and unregister_pernet_device().
 	 *
+	 * init2() is called during clone() or unshare() only.
+	 *
 	 * Exit methods using blocking RCU primitives, such as
 	 * synchronize_rcu(), should be implemented via exit_batch.
 	 * Then, destruction of a group of net requires single
@@ -422,6 +424,7 @@  struct pernet_operations {
 	 * the calls.
 	 */
 	int (*init)(struct net *net);
+	int (*init2)(struct net *net, struct net *old_net);
 	void (*pre_exit)(struct net *net);
 	void (*exit)(struct net *net);
 	void (*exit_batch)(struct list_head *net_exit_list);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6b9f19122ec1..b120ff97d9f5 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -116,7 +116,8 @@  static int net_assign_generic(struct net *net, unsigned int id, void *data)
 	return 0;
 }
 
-static int ops_init(const struct pernet_operations *ops, struct net *net)
+static int ops_init(const struct pernet_operations *ops,
+		    struct net *net, struct net *old_net)
 {
 	int err = -ENOMEM;
 	void *data = NULL;
@@ -133,6 +134,8 @@  static int ops_init(const struct pernet_operations *ops, struct net *net)
 	err = 0;
 	if (ops->init)
 		err = ops->init(net);
+	if (!err && ops->init2 && old_net)
+		err = ops->init2(net, old_net);
 	if (!err)
 		return 0;
 
@@ -301,7 +304,8 @@  EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 /*
  * setup_net runs the initializers for the network namespace object.
  */
-static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+static __net_init int setup_net(struct net *net, struct net *old_net,
+				struct user_namespace *user_ns)
 {
 	/* Must be called with pernet_ops_rwsem held */
 	const struct pernet_operations *ops, *saved_ops;
@@ -323,7 +327,7 @@  static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	mutex_init(&net->ipv4.ra_mutex);
 
 	list_for_each_entry(ops, &pernet_list, list) {
-		error = ops_init(ops, net);
+		error = ops_init(ops, net, old_net);
 		if (error < 0)
 			goto out_undo;
 	}
@@ -469,7 +473,7 @@  struct net *copy_net_ns(unsigned long flags,
 	if (rv < 0)
 		goto put_userns;
 
-	rv = setup_net(net, user_ns);
+	rv = setup_net(net, old_net, user_ns);
 
 	up_read(&pernet_ops_rwsem);
 
@@ -1107,7 +1111,7 @@  void __init net_ns_init(void)
 	init_net.key_domain = &init_net_key_domain;
 #endif
 	down_write(&pernet_ops_rwsem);
-	if (setup_net(&init_net, &init_user_ns))
+	if (setup_net(&init_net, NULL, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
@@ -1148,7 +1152,7 @@  static int __register_pernet_operations(struct list_head *list,
 
 			memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net));
 			old = set_active_memcg(memcg);
-			error = ops_init(ops, net);
+			error = ops_init(ops, net, NULL);
 			set_active_memcg(old);
 			mem_cgroup_put(memcg);
 			if (error)
@@ -1188,7 +1192,7 @@  static int __register_pernet_operations(struct list_head *list,
 		return 0;
 	}
 
-	return ops_init(ops, &init_net);
+	return ops_init(ops, &init_net, NULL);
 }
 
 static void __unregister_pernet_operations(struct pernet_operations *ops)