diff mbox series

[03/10] teach move_mount(2) to work with OPEN_TREE_CLONE

Message ID 155059612649.17079.7287713053194562461.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series VFS: Provide new mount UAPI | expand

Commit Message

David Howells Feb. 19, 2019, 5:08 p.m. UTC
Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
attached by move_mount(2).

If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
not detached anymore, it won't be dissolved.  move_mount(2) is adjusted
to handle detached source.

That gives us equivalents of mount --bind and mount --rbind.

Thanks also to Alan Jenkins <alan.christopher.jenkins@gmail.com> for
providing a whole bunch of ways to break things using this interface.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---

 fs/namespace.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 7 deletions(-)

Comments

Alan Jenkins Feb. 20, 2019, 6:59 p.m. UTC | #1
On 19/02/2019 17:08, David Howells wrote:
> Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
> attached by move_mount(2).
>
> If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
> not detached anymore, it won't be dissolved.  move_mount(2) is adjusted
> to handle detached source.
>
> That gives us equivalents of mount --bind and mount --rbind.
> Thanks also to Alan Jenkins<alan.christopher.jenkins@gmail.com>  for
> providing a whole bunch of ways to break things using this interface.
>
> Signed-off-by: Al Viro<viro@zeniv.linux.org.uk>
> Signed-off-by: David Howells<dhowells@redhat.com>
> Signed-off-by: Al Viro<viro@zeniv.linux.org.uk>
> ---
>
>   fs/namespace.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++------
>   1 file changed, 55 insertions(+), 7 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index f10122028a11..56423c60ac7e 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1840,10 +1840,16 @@ void dissolve_on_fput(struct vfsmount *mnt)
>   	namespace_lock();
>   	lock_mount_hash();
>   	ns = real_mount(mnt)->mnt_ns;
> -	umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
> +	if (ns) {
> +		if (is_anon_ns(ns))
> +			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
> +		else
> +			ns = NULL;
> +	}
>   	unlock_mount_hash();
>   	namespace_unlock();
> -	free_mnt_ns(ns);
> +	if (ns)
> +		free_mnt_ns(ns);
>   }
>   
>   void drop_collected_mounts(struct vfsmount *mnt)
> @@ -2079,6 +2085,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
>   		attach_mnt(source_mnt, dest_mnt, dest_mp);
>   		touch_mnt_namespace(source_mnt->mnt_ns);
>   	} else {
> +		if (source_mnt->mnt_ns) {
> +			/* move from anon - the caller will destroy */
> +			list_del_init(&source_mnt->mnt_ns->list);
> +		}
>   		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
>   		commit_tree(source_mnt);
>   	}
> @@ -2537,13 +2547,37 @@ static inline int tree_contains_unbindable(struct mount *mnt)
>   	return 0;
>   }
>   
> +/*
> + * Check that there aren't references to earlier/same mount namespaces in the
> + * specified subtree.  Such references can act as pins for mount namespaces
> + * that aren't checked by the mount-cycle checking code, thereby allowing
> + * cycles to be made.
> + */
> +static bool check_for_nsfs_mounts(struct mount *subtree)
> +{
> +	struct mount *p;
> +	bool ret = false;
> +
> +	lock_mount_hash();
> +	for (p = subtree; p; p = next_mnt(p, subtree))
> +		if (mnt_ns_loop(p->mnt.mnt_root))
> +			goto out;
> +
> +	ret = true;
> +out:
> +	unlock_mount_hash();
> +	return ret;
> +}
> +
>   static int do_move_mount(struct path *old_path, struct path *new_path)
>   {
>   	struct path parent_path = {.mnt = NULL, .dentry = NULL};
> +	struct mnt_namespace *ns;
>   	struct mount *p;
>   	struct mount *old;
>   	struct mountpoint *mp;
>   	int err;
> +	bool attached;
>   
>   	mp = lock_mount(new_path);
>   	if (IS_ERR(mp))
> @@ -2551,12 +2585,19 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
>   
>   	old = real_mount(old_path->mnt);
>   	p = real_mount(new_path->mnt);
> +	attached = mnt_has_parent(old);
> +	ns = old->mnt_ns;
>   
>   	err = -EINVAL;
> -	if (!check_mnt(p) || !check_mnt(old))
> +	/* The mountpoint must be in our namespace. */
> +	if (!check_mnt(p))
>   		goto out;
>   
> -	if (!mnt_has_parent(old))
> +	/* The thing moved should be either ours or completely unattached. */
> +	if (attached && !check_mnt(old))
> +		goto out;
> +
> +	if (!attached && !is_anon_ns(ns))

I think this is missing a check for ns != NULL, before passing it to 
is_anon_ns().

E.g. in case I called umount2(old_path, MNT_DETACH) beforehand.

>   		goto out;
>   
>   	if (old->mnt.mnt_flags & MNT_LOCKED)
> @@ -2571,7 +2612,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
>   	/*
>   	 * Don't move a mount residing in a shared parent.
>   	 */
> -	if (IS_MNT_SHARED(old->mnt_parent))
> +	if (attached && IS_MNT_SHARED(old->mnt_parent))
>   		goto out;
>   	/*
>   	 * Don't move a mount tree containing unbindable mounts to a destination
> @@ -2580,12 +2621,14 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
>   	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
>   		goto out;
>   	err = -ELOOP;
> +	if (!check_for_nsfs_mounts(old))
> +		goto out;
>   	for (; mnt_has_parent(p); p = p->mnt_parent)
>   		if (p == old)
>   			goto out;
>   
>   	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
> -				   &parent_path);
> +				   attached ? &parent_path : NULL);
>   	if (err)
>   		goto out;
>   
> @@ -2594,8 +2637,11 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
>   	list_del_init(&old->mnt_expire);
>   out:
>   	unlock_mount(mp);
> -	if (!err)
> +	if (!err) {
>   		path_put(&parent_path);
> +		if (!attached)
> +			free_mnt_ns(ns);
> +	}
>   	return err;
>   }
>   
> @@ -3289,6 +3335,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
>   
>   /*
>    * Move a mount from one place to another.
> + * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
> + * used to copy a mount subtree.
>    *
>    * Note the flags value is a combination of MOVE_MOUNT_* flags.
>    */
>
>
Alan Jenkins Feb. 26, 2019, 5:45 p.m. UTC | #2
On 19/02/2019 17:08, David Howells wrote:
> Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
> attached by move_mount(2).
>
> If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
> not detached anymore, it won't be dissolved.  move_mount(2) is adjusted
> to handle detached source.
>
> That gives us equivalents of mount --bind and mount --rbind.

This is a bit ambiguous.  The two cases can be understood by analogy to 
bind / rbind.  But it is also seems natural, to think it could be used 
to implement the exact same thing as current `mount --bind` / 
`--rbind`.  I think it *does* now provide a full equivalence, right?

I was thinking about the case where mount propagation is enabled on the 
source tree, i.e. it is not a private mount.  Suppose a new mount is 
added inside the source tree, between open_tree() and move_mount().

In the previous version of the patch series, Eric suggested there was a 
NULL dereference in this scenario.[1]  This version should be safe.  I 
think the new mount will be propagated to the cloned tree. Furthermore - 
due to the way this version uses a temporary mount namespace - the 
propagated version of the mount will not be locked by 
attach_recursive_mnt().

[1] https://lore.kernel.org/lkml/87bm7n5k1r.fsf@xmission.com/

It looks very neat now, with the use of the temporary namespaces. 
Congratulations :-).  I have finished looking through these patches 1-3 now.

> Thanks also to Alan Jenkins<alan.christopher.jenkins@gmail.com>  for
> providing a whole bunch of ways to break things using this interface.
>
> Signed-off-by: Al Viro<viro@zeniv.linux.org.uk>
> Signed-off-by: David Howells<dhowells@redhat.com>
> Signed-off-by: Al Viro<viro@zeniv.linux.org.uk>
>
P.S. I guess Al does not need two Signed-off-by lines here.

Thanks
Alan
diff mbox series

Patch

diff --git a/fs/namespace.c b/fs/namespace.c
index f10122028a11..56423c60ac7e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1840,10 +1840,16 @@  void dissolve_on_fput(struct vfsmount *mnt)
 	namespace_lock();
 	lock_mount_hash();
 	ns = real_mount(mnt)->mnt_ns;
-	umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+	if (ns) {
+		if (is_anon_ns(ns))
+			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+		else
+			ns = NULL;
+	}
 	unlock_mount_hash();
 	namespace_unlock();
-	free_mnt_ns(ns);
+	if (ns)
+		free_mnt_ns(ns);
 }
 
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -2079,6 +2085,10 @@  static int attach_recursive_mnt(struct mount *source_mnt,
 		attach_mnt(source_mnt, dest_mnt, dest_mp);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
+		if (source_mnt->mnt_ns) {
+			/* move from anon - the caller will destroy */
+			list_del_init(&source_mnt->mnt_ns->list);
+		}
 		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
 		commit_tree(source_mnt);
 	}
@@ -2537,13 +2547,37 @@  static inline int tree_contains_unbindable(struct mount *mnt)
 	return 0;
 }
 
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree.  Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+	struct mount *p;
+	bool ret = false;
+
+	lock_mount_hash();
+	for (p = subtree; p; p = next_mnt(p, subtree))
+		if (mnt_ns_loop(p->mnt.mnt_root))
+			goto out;
+
+	ret = true;
+out:
+	unlock_mount_hash();
+	return ret;
+}
+
 static int do_move_mount(struct path *old_path, struct path *new_path)
 {
 	struct path parent_path = {.mnt = NULL, .dentry = NULL};
+	struct mnt_namespace *ns;
 	struct mount *p;
 	struct mount *old;
 	struct mountpoint *mp;
 	int err;
+	bool attached;
 
 	mp = lock_mount(new_path);
 	if (IS_ERR(mp))
@@ -2551,12 +2585,19 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 
 	old = real_mount(old_path->mnt);
 	p = real_mount(new_path->mnt);
+	attached = mnt_has_parent(old);
+	ns = old->mnt_ns;
 
 	err = -EINVAL;
-	if (!check_mnt(p) || !check_mnt(old))
+	/* The mountpoint must be in our namespace. */
+	if (!check_mnt(p))
 		goto out;
 
-	if (!mnt_has_parent(old))
+	/* The thing moved should be either ours or completely unattached. */
+	if (attached && !check_mnt(old))
+		goto out;
+
+	if (!attached && !is_anon_ns(ns))
 		goto out;
 
 	if (old->mnt.mnt_flags & MNT_LOCKED)
@@ -2571,7 +2612,7 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (IS_MNT_SHARED(old->mnt_parent))
+	if (attached && IS_MNT_SHARED(old->mnt_parent))
 		goto out;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
@@ -2580,12 +2621,14 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
 		goto out;
 	err = -ELOOP;
+	if (!check_for_nsfs_mounts(old))
+		goto out;
 	for (; mnt_has_parent(p); p = p->mnt_parent)
 		if (p == old)
 			goto out;
 
 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
-				   &parent_path);
+				   attached ? &parent_path : NULL);
 	if (err)
 		goto out;
 
@@ -2594,8 +2637,11 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	list_del_init(&old->mnt_expire);
 out:
 	unlock_mount(mp);
-	if (!err)
+	if (!err) {
 		path_put(&parent_path);
+		if (!attached)
+			free_mnt_ns(ns);
+	}
 	return err;
 }
 
@@ -3289,6 +3335,8 @@  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 
 /*
  * Move a mount from one place to another.
+ * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
+ * used to copy a mount subtree.
  *
  * Note the flags value is a combination of MOVE_MOUNT_* flags.
  */