diff mbox

[3/4] autofs - make mountpoint checks namespace aware

Message ID 20160914061445.24714.68331.stgit@pluto.themaw.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ian Kent Sept. 14, 2016, 6:14 a.m. UTC
If an automount mount is clone(2)ed into a file system that is
propagation private, when it later expires in the originating
namespace subsequent calls to autofs ->d_automount() for that
dentry in the original namespace will return ELOOP until the
mount is manually umounted in the cloned namespace.

In the same way, if an autofs mount is triggered by automount(8)
running within a container the dentry will be seen as mounted in
the root init namespace and calls to ->d_automount() in that namespace
will return ELOOP until the mount is umounted within the container.

Also, have_submounts() can return an incorect result when a mount
exists in a namespace other than the one being checked.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Omar Sandoval <osandov@osandov.com>
---
 fs/autofs4/dev-ioctl.c |    2 +-
 fs/autofs4/expire.c    |    4 ++--
 fs/autofs4/root.c      |   30 +++++++++++++++---------------
 fs/autofs4/waitq.c     |    2 +-
 4 files changed, 19 insertions(+), 19 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric W. Biederman Sept. 14, 2016, 5:28 p.m. UTC | #1
Ian Kent <raven@themaw.net> writes:

> If an automount mount is clone(2)ed into a file system that is
> propagation private, when it later expires in the originating
> namespace subsequent calls to autofs ->d_automount() for that
> dentry in the original namespace will return ELOOP until the
> mount is manually umounted in the cloned namespace.
>
> In the same way, if an autofs mount is triggered by automount(8)
> running within a container the dentry will be seen as mounted in
> the root init namespace and calls to ->d_automount() in that namespace
> will return ELOOP until the mount is umounted within the container.
>
> Also, have_submounts() can return an incorect result when a mount
> exists in a namespace other than the one being checked.

Overall this appears to be a fairly reasonable set of changes.  It does
increase the expense when an actual mount point is encountered, but if
these are the desired some increase in cost when a dentry is a
mountpoint is unavoidable.

May I ask the motiviation for this set of changes?  Reading through the
changes I don't grasp why we want to change the behavior of autofs.
What problem is being solved?  What are the benefits?

Eric

> Signed-off-by: Ian Kent <raven@themaw.net>
> Cc: Al Viro <viro@ZenIV.linux.org.uk>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Omar Sandoval <osandov@osandov.com>
> ---
>  fs/autofs4/dev-ioctl.c |    2 +-
>  fs/autofs4/expire.c    |    4 ++--
>  fs/autofs4/root.c      |   30 +++++++++++++++---------------
>  fs/autofs4/waitq.c     |    2 +-
>  4 files changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
> index c7fcc74..0024e25 100644
> --- a/fs/autofs4/dev-ioctl.c
> +++ b/fs/autofs4/dev-ioctl.c
> @@ -564,7 +564,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
>  
>  		devid = new_encode_dev(dev);
>  
> -		err = have_submounts(path.dentry);
> +		err = have_local_submounts(path.dentry);
>  
>  		if (follow_down_one(&path))
>  			magic = path.dentry->d_sb->s_magic;
> diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
> index d8e6d42..7cc34ef 100644
> --- a/fs/autofs4/expire.c
> +++ b/fs/autofs4/expire.c
> @@ -236,7 +236,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
>  		 * count for the autofs dentry.
>  		 * If the fs is busy update the expiry counter.
>  		 */
> -		if (d_mountpoint(p)) {
> +		if (is_local_mountpoint(p)) {
>  			if (autofs4_mount_busy(mnt, p)) {
>  				top_ino->last_used = jiffies;
>  				dput(p);
> @@ -280,7 +280,7 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
>  	while ((p = get_next_positive_dentry(p, parent))) {
>  		pr_debug("dentry %p %pd\n", p, p);
>  
> -		if (d_mountpoint(p)) {
> +		if (is_local_mountpoint(p)) {
>  			/* Can we umount this guy */
>  			if (autofs4_mount_busy(mnt, p))
>  				continue;
> diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
> index fa84bb8..4150ad6 100644
> --- a/fs/autofs4/root.c
> +++ b/fs/autofs4/root.c
> @@ -123,7 +123,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
>  	 * it.
>  	 */
>  	spin_lock(&sbi->lookup_lock);
> -	if (!d_mountpoint(dentry) && simple_empty(dentry)) {
> +	if (!is_local_mountpoint(dentry) && simple_empty(dentry)) {
>  		spin_unlock(&sbi->lookup_lock);
>  		return -ENOENT;
>  	}
> @@ -370,28 +370,28 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
>  
>  	/*
>  	 * If the dentry is a symlink it's equivalent to a directory
> -	 * having d_mountpoint() true, so there's no need to call back
> -	 * to the daemon.
> +	 * having is_local_mountpoint() true, so there's no need to
> +	 * call back to the daemon.
>  	 */
>  	if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
>  		spin_unlock(&sbi->fs_lock);
>  		goto done;
>  	}
>  
> -	if (!d_mountpoint(dentry)) {
> +	if (!is_local_mountpoint(dentry)) {
>  		/*
>  		 * It's possible that user space hasn't removed directories
>  		 * after umounting a rootless multi-mount, although it
> -		 * should. For v5 have_submounts() is sufficient to handle
> -		 * this because the leaves of the directory tree under the
> -		 * mount never trigger mounts themselves (they have an autofs
> -		 * trigger mount mounted on them). But v4 pseudo direct mounts
> -		 * do need the leaves to trigger mounts. In this case we
> -		 * have no choice but to use the list_empty() check and
> -		 * require user space behave.
> +		 * should. For v5 have_local_submounts() is sufficient to
> +		 * handle this because the leaves of the directory tree under
> +		 * the mount never trigger mounts themselves (they have an
> +		 * autofs trigger mount mounted on them). But v4 pseudo
> +		 * direct mounts do need the leaves to trigger mounts. In
> +		 * this case we have no choice but to use the list_empty()
> +		 * check and require user space behave.
>  		 */
>  		if (sbi->version > 4) {
> -			if (have_submounts(dentry)) {
> +			if (have_local_submounts(dentry)) {
>  				spin_unlock(&sbi->fs_lock);
>  				goto done;
>  			}
> @@ -431,7 +431,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
>  
>  	/* The daemon never waits. */
>  	if (autofs4_oz_mode(sbi)) {
> -		if (!d_mountpoint(dentry))
> +		if (!is_local_mountpoint(dentry))
>  			return -EISDIR;
>  		return 0;
>  	}
> @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
>  
>  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
>  			return 0;
> -		if (d_mountpoint(dentry))
> +		if (is_local_mountpoint(dentry))
>  			return 0;
>  		inode = d_inode_rcu(dentry);
>  		if (inode && S_ISLNK(inode->i_mode))
> @@ -487,7 +487,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
>  		 * we can avoid needless calls ->d_automount() and avoid
>  		 * an incorrect ELOOP error return.
>  		 */
> -		if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
> +		if ((!is_local_mountpoint(dentry) && !simple_empty(dentry)) ||
>  		    (d_really_is_positive(dentry) && d_is_symlink(dentry)))
>  			status = -EISDIR;
>  	}
> diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> index 431fd7e..911f4d5 100644
> --- a/fs/autofs4/waitq.c
> +++ b/fs/autofs4/waitq.c
> @@ -333,7 +333,7 @@ static int validate_request(struct autofs_wait_queue **wait,
>  					dentry = new;
>  			}
>  		}
> -		if (have_submounts(dentry))
> +		if (have_local_submounts(dentry))
>  			valid = 0;
>  
>  		if (new)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 15, 2016, 12:09 a.m. UTC | #2
On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > If an automount mount is clone(2)ed into a file system that is
> > propagation private, when it later expires in the originating
> > namespace subsequent calls to autofs ->d_automount() for that
> > dentry in the original namespace will return ELOOP until the
> > mount is manually umounted in the cloned namespace.
> > 
> > In the same way, if an autofs mount is triggered by automount(8)
> > running within a container the dentry will be seen as mounted in
> > the root init namespace and calls to ->d_automount() in that namespace
> > will return ELOOP until the mount is umounted within the container.
> > 
> > Also, have_submounts() can return an incorect result when a mount
> > exists in a namespace other than the one being checked.
> 
> Overall this appears to be a fairly reasonable set of changes.  It does
> increase the expense when an actual mount point is encountered, but if
> these are the desired some increase in cost when a dentry is a
> mountpoint is unavoidable.
> 
> May I ask the motiviation for this set of changes?  Reading through the
> changes I don't grasp why we want to change the behavior of autofs.
> What problem is being solved?  What are the benefits?

LOL, it's all too easy for me to give a patch description that I think explains
a problem I need to solve without realizing it isn't clear to others what the
problem is, sorry about that.

For quite a while now, and not that frequently but consistently, I've been
getting reports of people using autofs getting ELOOP errors and not being able
to mount automounts.

This has been due to the cloning of autofs file systems (that have active
automounts at the time of the clone) by other systems.

An unshare, as one example, can easily result in the cloning of an autofs file
system that has active mounts which shows this problem.

Once an active mount that has been cloned is expired in the namespace that
performed the unshare it can't be (auto)mounted again in the the originating
namespace because the mounted check in the autofs module will think it is
already mounted.

I'm not sure this is a clear description either, hopefully it is enough to
demonstrate the type of problem I'm typing to solve.

> Eric
> 
> > Signed-off-by: Ian Kent <raven@themaw.net>
> > Cc: Al Viro <viro@ZenIV.linux.org.uk>
> > Cc: Eric W. Biederman <ebiederm@xmission.com>
> > Cc: Omar Sandoval <osandov@osandov.com>
> > ---
> >  fs/autofs4/dev-ioctl.c |    2 +-
> >  fs/autofs4/expire.c    |    4 ++--
> >  fs/autofs4/root.c      |   30 +++++++++++++++---------------
> >  fs/autofs4/waitq.c     |    2 +-
> >  4 files changed, 19 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
> > index c7fcc74..0024e25 100644
> > --- a/fs/autofs4/dev-ioctl.c
> > +++ b/fs/autofs4/dev-ioctl.c
> > @@ -564,7 +564,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file
> > *fp,
> >  
> >  		devid = new_encode_dev(dev);
> >  
> > -		err = have_submounts(path.dentry);
> > +		err = have_local_submounts(path.dentry);
> >  
> >  		if (follow_down_one(&path))
> >  			magic = path.dentry->d_sb->s_magic;
> > diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
> > index d8e6d42..7cc34ef 100644
> > --- a/fs/autofs4/expire.c
> > +++ b/fs/autofs4/expire.c
> > @@ -236,7 +236,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
> >  		 * count for the autofs dentry.
> >  		 * If the fs is busy update the expiry counter.
> >  		 */
> > -		if (d_mountpoint(p)) {
> > +		if (is_local_mountpoint(p)) {
> >  			if (autofs4_mount_busy(mnt, p)) {
> >  				top_ino->last_used = jiffies;
> >  				dput(p);
> > @@ -280,7 +280,7 @@ static struct dentry *autofs4_check_leaves(struct
> > vfsmount *mnt,
> >  	while ((p = get_next_positive_dentry(p, parent))) {
> >  		pr_debug("dentry %p %pd\n", p, p);
> >  
> > -		if (d_mountpoint(p)) {
> > +		if (is_local_mountpoint(p)) {
> >  			/* Can we umount this guy */
> >  			if (autofs4_mount_busy(mnt, p))
> >  				continue;
> > diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
> > index fa84bb8..4150ad6 100644
> > --- a/fs/autofs4/root.c
> > +++ b/fs/autofs4/root.c
> > @@ -123,7 +123,7 @@ static int autofs4_dir_open(struct inode *inode, struct
> > file *file)
> >  	 * it.
> >  	 */
> >  	spin_lock(&sbi->lookup_lock);
> > -	if (!d_mountpoint(dentry) && simple_empty(dentry)) {
> > +	if (!is_local_mountpoint(dentry) && simple_empty(dentry)) {
> >  		spin_unlock(&sbi->lookup_lock);
> >  		return -ENOENT;
> >  	}
> > @@ -370,28 +370,28 @@ static struct vfsmount *autofs4_d_automount(struct
> > path *path)
> >  
> >  	/*
> >  	 * If the dentry is a symlink it's equivalent to a directory
> > -	 * having d_mountpoint() true, so there's no need to call back
> > -	 * to the daemon.
> > +	 * having is_local_mountpoint() true, so there's no need to
> > +	 * call back to the daemon.
> >  	 */
> >  	if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
> >  		spin_unlock(&sbi->fs_lock);
> >  		goto done;
> >  	}
> >  
> > -	if (!d_mountpoint(dentry)) {
> > +	if (!is_local_mountpoint(dentry)) {
> >  		/*
> >  		 * It's possible that user space hasn't removed directories
> >  		 * after umounting a rootless multi-mount, although it
> > -		 * should. For v5 have_submounts() is sufficient to handle
> > -		 * this because the leaves of the directory tree under the
> > -		 * mount never trigger mounts themselves (they have an
> > autofs
> > -		 * trigger mount mounted on them). But v4 pseudo direct
> > mounts
> > -		 * do need the leaves to trigger mounts. In this case we
> > -		 * have no choice but to use the list_empty() check and
> > -		 * require user space behave.
> > +		 * should. For v5 have_local_submounts() is sufficient to
> > +		 * handle this because the leaves of the directory tree
> > under
> > +		 * the mount never trigger mounts themselves (they have an
> > +		 * autofs trigger mount mounted on them). But v4 pseudo
> > +		 * direct mounts do need the leaves to trigger mounts. In
> > +		 * this case we have no choice but to use the list_empty()
> > +		 * check and require user space behave.
> >  		 */
> >  		if (sbi->version > 4) {
> > -			if (have_submounts(dentry)) {
> > +			if (have_local_submounts(dentry)) {
> >  				spin_unlock(&sbi->fs_lock);
> >  				goto done;
> >  			}
> > @@ -431,7 +431,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  
> >  	/* The daemon never waits. */
> >  	if (autofs4_oz_mode(sbi)) {
> > -		if (!d_mountpoint(dentry))
> > +		if (!is_local_mountpoint(dentry))
> >  			return -EISDIR;
> >  		return 0;
> >  	}
> > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  
> >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> >  			return 0;
> > -		if (d_mountpoint(dentry))
> > +		if (is_local_mountpoint(dentry))
> >  			return 0;
> >  		inode = d_inode_rcu(dentry);
> >  		if (inode && S_ISLNK(inode->i_mode))
> > @@ -487,7 +487,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  		 * we can avoid needless calls ->d_automount() and avoid
> >  		 * an incorrect ELOOP error return.
> >  		 */
> > -		if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
> > +		if ((!is_local_mountpoint(dentry) && !simple_empty(dentry))
> > ||
> >  		    (d_really_is_positive(dentry) && d_is_symlink(dentry)))
> >  			status = -EISDIR;
> >  	}
> > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > index 431fd7e..911f4d5 100644
> > --- a/fs/autofs4/waitq.c
> > +++ b/fs/autofs4/waitq.c
> > @@ -333,7 +333,7 @@ static int validate_request(struct autofs_wait_queue
> > **wait,
> >  					dentry = new;
> >  			}
> >  		}
> > -		if (have_submounts(dentry))
> > +		if (have_local_submounts(dentry))
> >  			valid = 0;
> >  
> >  		if (new)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Omar Sandoval Sept. 15, 2016, 12:32 a.m. UTC | #3
On Thu, Sep 15, 2016 at 08:09:23AM +0800, Ian Kent wrote:
> On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> > Ian Kent <raven@themaw.net> writes:
> > 
> > > If an automount mount is clone(2)ed into a file system that is
> > > propagation private, when it later expires in the originating
> > > namespace subsequent calls to autofs ->d_automount() for that
> > > dentry in the original namespace will return ELOOP until the
> > > mount is manually umounted in the cloned namespace.
> > > 
> > > In the same way, if an autofs mount is triggered by automount(8)
> > > running within a container the dentry will be seen as mounted in
> > > the root init namespace and calls to ->d_automount() in that namespace
> > > will return ELOOP until the mount is umounted within the container.
> > > 
> > > Also, have_submounts() can return an incorect result when a mount
> > > exists in a namespace other than the one being checked.
> > 
> > Overall this appears to be a fairly reasonable set of changes.  It does
> > increase the expense when an actual mount point is encountered, but if
> > these are the desired some increase in cost when a dentry is a
> > mountpoint is unavoidable.
> > 
> > May I ask the motiviation for this set of changes?  Reading through the
> > changes I don't grasp why we want to change the behavior of autofs.
> > What problem is being solved?  What are the benefits?
> 
> LOL, it's all too easy for me to give a patch description that I think explains
> a problem I need to solve without realizing it isn't clear to others what the
> problem is, sorry about that.
> 
> For quite a while now, and not that frequently but consistently, I've been
> getting reports of people using autofs getting ELOOP errors and not being able
> to mount automounts.
> 
> This has been due to the cloning of autofs file systems (that have active
> automounts at the time of the clone) by other systems.
> 
> An unshare, as one example, can easily result in the cloning of an autofs file
> system that has active mounts which shows this problem.
> 
> Once an active mount that has been cloned is expired in the namespace that
> performed the unshare it can't be (auto)mounted again in the the originating
> namespace because the mounted check in the autofs module will think it is
> already mounted.
> 
> I'm not sure this is a clear description either, hopefully it is enough to
> demonstrate the type of problem I'm typing to solve.

Yup, at Facebook we've been hitting this issue for years. Our container
setup doesn't clean up the base system's mounts after the
unshare(CLONE_NEWNS) and before the chroot(), so we very frequently see
the base system's autofs mounts get broken with ELOOP. The solution
there might be to fix our container setup, but I think it's still a
kernel bug, as it breaks the isolation between namespaces.

Ian, I'm going to test these patches, thanks for sending them out.
Ian Kent Sept. 15, 2016, 1:03 a.m. UTC | #4
On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > If an automount mount is clone(2)ed into a file system that is
> > propagation private, when it later expires in the originating
> > namespace subsequent calls to autofs ->d_automount() for that
> > dentry in the original namespace will return ELOOP until the
> > mount is manually umounted in the cloned namespace.
> > 
> > In the same way, if an autofs mount is triggered by automount(8)
> > running within a container the dentry will be seen as mounted in
> > the root init namespace and calls to ->d_automount() in that namespace
> > will return ELOOP until the mount is umounted within the container.
> > 
> > Also, have_submounts() can return an incorect result when a mount
> > exists in a namespace other than the one being checked.
> 
> Overall this appears to be a fairly reasonable set of changes.  It does
> increase the expense when an actual mount point is encountered, but if
> these are the desired some increase in cost when a dentry is a
> mountpoint is unavoidable.

The possibility of a significant increase in overhead with this change for
autofs is one reason I've held back on posting the change for a long time.

If there are many instances of a mount (ie. thousands) I think the mnt_namespace
mount list could become large enough to be a problem. So that list might
eventually need to be a hashed list instead of a linear list.

But this would likely also be a problem in areas other than just autofs.

> 
> May I ask the motiviation for this set of changes?  Reading through the
> changes I don't grasp why we want to change the behavior of autofs.
> What problem is being solved?  What are the benefits?
> 
> Eric
> 
> > Signed-off-by: Ian Kent <raven@themaw.net>
> > Cc: Al Viro <viro@ZenIV.linux.org.uk>
> > Cc: Eric W. Biederman <ebiederm@xmission.com>
> > Cc: Omar Sandoval <osandov@osandov.com>
> > ---
> >  fs/autofs4/dev-ioctl.c |    2 +-
> >  fs/autofs4/expire.c    |    4 ++--
> >  fs/autofs4/root.c      |   30 +++++++++++++++---------------
> >  fs/autofs4/waitq.c     |    2 +-
> >  4 files changed, 19 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
> > index c7fcc74..0024e25 100644
> > --- a/fs/autofs4/dev-ioctl.c
> > +++ b/fs/autofs4/dev-ioctl.c
> > @@ -564,7 +564,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file
> > *fp,
> >  
> >  		devid = new_encode_dev(dev);
> >  
> > -		err = have_submounts(path.dentry);
> > +		err = have_local_submounts(path.dentry);
> >  
> >  		if (follow_down_one(&path))
> >  			magic = path.dentry->d_sb->s_magic;
> > diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
> > index d8e6d42..7cc34ef 100644
> > --- a/fs/autofs4/expire.c
> > +++ b/fs/autofs4/expire.c
> > @@ -236,7 +236,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
> >  		 * count for the autofs dentry.
> >  		 * If the fs is busy update the expiry counter.
> >  		 */
> > -		if (d_mountpoint(p)) {
> > +		if (is_local_mountpoint(p)) {
> >  			if (autofs4_mount_busy(mnt, p)) {
> >  				top_ino->last_used = jiffies;
> >  				dput(p);
> > @@ -280,7 +280,7 @@ static struct dentry *autofs4_check_leaves(struct
> > vfsmount *mnt,
> >  	while ((p = get_next_positive_dentry(p, parent))) {
> >  		pr_debug("dentry %p %pd\n", p, p);
> >  
> > -		if (d_mountpoint(p)) {
> > +		if (is_local_mountpoint(p)) {
> >  			/* Can we umount this guy */
> >  			if (autofs4_mount_busy(mnt, p))
> >  				continue;
> > diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
> > index fa84bb8..4150ad6 100644
> > --- a/fs/autofs4/root.c
> > +++ b/fs/autofs4/root.c
> > @@ -123,7 +123,7 @@ static int autofs4_dir_open(struct inode *inode, struct
> > file *file)
> >  	 * it.
> >  	 */
> >  	spin_lock(&sbi->lookup_lock);
> > -	if (!d_mountpoint(dentry) && simple_empty(dentry)) {
> > +	if (!is_local_mountpoint(dentry) && simple_empty(dentry)) {
> >  		spin_unlock(&sbi->lookup_lock);
> >  		return -ENOENT;
> >  	}
> > @@ -370,28 +370,28 @@ static struct vfsmount *autofs4_d_automount(struct
> > path *path)
> >  
> >  	/*
> >  	 * If the dentry is a symlink it's equivalent to a directory
> > -	 * having d_mountpoint() true, so there's no need to call back
> > -	 * to the daemon.
> > +	 * having is_local_mountpoint() true, so there's no need to
> > +	 * call back to the daemon.
> >  	 */
> >  	if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
> >  		spin_unlock(&sbi->fs_lock);
> >  		goto done;
> >  	}
> >  
> > -	if (!d_mountpoint(dentry)) {
> > +	if (!is_local_mountpoint(dentry)) {
> >  		/*
> >  		 * It's possible that user space hasn't removed directories
> >  		 * after umounting a rootless multi-mount, although it
> > -		 * should. For v5 have_submounts() is sufficient to handle
> > -		 * this because the leaves of the directory tree under the
> > -		 * mount never trigger mounts themselves (they have an
> > autofs
> > -		 * trigger mount mounted on them). But v4 pseudo direct
> > mounts
> > -		 * do need the leaves to trigger mounts. In this case we
> > -		 * have no choice but to use the list_empty() check and
> > -		 * require user space behave.
> > +		 * should. For v5 have_local_submounts() is sufficient to
> > +		 * handle this because the leaves of the directory tree
> > under
> > +		 * the mount never trigger mounts themselves (they have an
> > +		 * autofs trigger mount mounted on them). But v4 pseudo
> > +		 * direct mounts do need the leaves to trigger mounts. In
> > +		 * this case we have no choice but to use the list_empty()
> > +		 * check and require user space behave.
> >  		 */
> >  		if (sbi->version > 4) {
> > -			if (have_submounts(dentry)) {
> > +			if (have_local_submounts(dentry)) {
> >  				spin_unlock(&sbi->fs_lock);
> >  				goto done;
> >  			}
> > @@ -431,7 +431,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  
> >  	/* The daemon never waits. */
> >  	if (autofs4_oz_mode(sbi)) {
> > -		if (!d_mountpoint(dentry))
> > +		if (!is_local_mountpoint(dentry))
> >  			return -EISDIR;
> >  		return 0;
> >  	}
> > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  
> >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> >  			return 0;
> > -		if (d_mountpoint(dentry))
> > +		if (is_local_mountpoint(dentry))
> >  			return 0;
> >  		inode = d_inode_rcu(dentry);
> >  		if (inode && S_ISLNK(inode->i_mode))
> > @@ -487,7 +487,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  		 * we can avoid needless calls ->d_automount() and avoid
> >  		 * an incorrect ELOOP error return.
> >  		 */
> > -		if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
> > +		if ((!is_local_mountpoint(dentry) && !simple_empty(dentry))
> > ||
> >  		    (d_really_is_positive(dentry) && d_is_symlink(dentry)))
> >  			status = -EISDIR;
> >  	}
> > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > index 431fd7e..911f4d5 100644
> > --- a/fs/autofs4/waitq.c
> > +++ b/fs/autofs4/waitq.c
> > @@ -333,7 +333,7 @@ static int validate_request(struct autofs_wait_queue
> > **wait,
> >  					dentry = new;
> >  			}
> >  		}
> > -		if (have_submounts(dentry))
> > +		if (have_local_submounts(dentry))
> >  			valid = 0;
> >  
> >  		if (new)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 15, 2016, 2:08 a.m. UTC | #5
Ian Kent <raven@themaw.net> writes:

> On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> > If an automount mount is clone(2)ed into a file system that is
>> > propagation private, when it later expires in the originating
>> > namespace subsequent calls to autofs ->d_automount() for that
>> > dentry in the original namespace will return ELOOP until the
>> > mount is manually umounted in the cloned namespace.
>> > 
>> > In the same way, if an autofs mount is triggered by automount(8)
>> > running within a container the dentry will be seen as mounted in
>> > the root init namespace and calls to ->d_automount() in that namespace
>> > will return ELOOP until the mount is umounted within the container.
>> > 
>> > Also, have_submounts() can return an incorect result when a mount
>> > exists in a namespace other than the one being checked.
>> 
>> Overall this appears to be a fairly reasonable set of changes.  It does
>> increase the expense when an actual mount point is encountered, but if
>> these are the desired some increase in cost when a dentry is a
>> mountpoint is unavoidable.
>> 
>> May I ask the motiviation for this set of changes?  Reading through the
>> changes I don't grasp why we want to change the behavior of autofs.
>> What problem is being solved?  What are the benefits?
>
> LOL, it's all too easy for me to give a patch description that I think explains
> a problem I need to solve without realizing it isn't clear to others what the
> problem is, sorry about that.
>
> For quite a while now, and not that frequently but consistently, I've been
> getting reports of people using autofs getting ELOOP errors and not being able
> to mount automounts.
>
> This has been due to the cloning of autofs file systems (that have active
> automounts at the time of the clone) by other systems.
>
> An unshare, as one example, can easily result in the cloning of an autofs file
> system that has active mounts which shows this problem.
>
> Once an active mount that has been cloned is expired in the namespace that
> performed the unshare it can't be (auto)mounted again in the the originating
> namespace because the mounted check in the autofs module will think it is
> already mounted.
>
> I'm not sure this is a clear description either, hopefully it is enough to
> demonstrate the type of problem I'm typing to solve.

So to rephrase the problem is that an autofs instance can stop working
properly from the perspective of the mount namespace it is mounted in
if the autofs instance is shared between multiple mount namespaces.  The
problem is that mounts and unmounts do not always propogate between
mount namespaces.  This lack of symmetric mount/unmount behavior
leads to mountpoints that become unusable.

Which leads to the question what is the expected new behavior with your
patchset applied.  New mounts can be added in the parent mount namespace
(because the test is local).  Does your change also allow the
autofs mountpoints to be used in the other mount namespaces that share
the autofs instance if everything becomes unmounted?

Or is it expected that other mount namespaces that share an autofs
instance will get changes in their mounts via mount propagation and if
mount propagation is insufficient they are on their own.

I believe this is a question of how do notifications of the desire for
an automount work after your change, and are those notifications
consistent with your desired and/or expected behavior.

Eric





--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 15, 2016, 4:12 a.m. UTC | #6
On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > If an automount mount is clone(2)ed into a file system that is
> > > > propagation private, when it later expires in the originating
> > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > dentry in the original namespace will return ELOOP until the
> > > > mount is manually umounted in the cloned namespace.
> > > > 
> > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > running within a container the dentry will be seen as mounted in
> > > > the root init namespace and calls to ->d_automount() in that namespace
> > > > will return ELOOP until the mount is umounted within the container.
> > > > 
> > > > Also, have_submounts() can return an incorect result when a mount
> > > > exists in a namespace other than the one being checked.
> > > 
> > > Overall this appears to be a fairly reasonable set of changes.  It does
> > > increase the expense when an actual mount point is encountered, but if
> > > these are the desired some increase in cost when a dentry is a
> > > mountpoint is unavoidable.
> > > 
> > > May I ask the motiviation for this set of changes?  Reading through the
> > > changes I don't grasp why we want to change the behavior of autofs.
> > > What problem is being solved?  What are the benefits?
> > 
> > LOL, it's all too easy for me to give a patch description that I think
> > explains
> > a problem I need to solve without realizing it isn't clear to others what
> > the
> > problem is, sorry about that.
> > 
> > For quite a while now, and not that frequently but consistently, I've been
> > getting reports of people using autofs getting ELOOP errors and not being
> > able
> > to mount automounts.
> > 
> > This has been due to the cloning of autofs file systems (that have active
> > automounts at the time of the clone) by other systems.
> > 
> > An unshare, as one example, can easily result in the cloning of an autofs
> > file
> > system that has active mounts which shows this problem.
> > 
> > Once an active mount that has been cloned is expired in the namespace that
> > performed the unshare it can't be (auto)mounted again in the the originating
> > namespace because the mounted check in the autofs module will think it is
> > already mounted.
> > 
> > I'm not sure this is a clear description either, hopefully it is enough to
> > demonstrate the type of problem I'm typing to solve.
> 
> So to rephrase the problem is that an autofs instance can stop working
> properly from the perspective of the mount namespace it is mounted in
> if the autofs instance is shared between multiple mount namespaces.  The
> problem is that mounts and unmounts do not always propogate between
> mount namespaces.  This lack of symmetric mount/unmount behavior
> leads to mountpoints that become unusable.

That's right.

It's also worth considering that symmetric mount propagation is usually not the
behaviour needed either and things like LXC and Docker are set propagation slave
because of problems caused by propagation back to the parent namespace.

So a mount can be triggered within a container, mounted by the automount daemon
in the parent namespace, and propagated to the child and similarly for expires,
which is the common use case now.

> 
> Which leads to the question what is the expected new behavior with your
> patchset applied.  New mounts can be added in the parent mount namespace
> (because the test is local).  Does your change also allow the
> autofs mountpoints to be used in the other mount namespaces that share
> the autofs instance if everything becomes unmounted?

The problem occurs when the subordinate namespace doesn't deal with these
propagated mounts properly, although they can obviously be used by the
subordinate namespace.

> 
> Or is it expected that other mount namespaces that share an autofs
> instance will get changes in their mounts via mount propagation and if
> mount propagation is insufficient they are on their own.

Namespaces that receive updates via mount propagation from a parent will
continue to function as they do now.

Mounts that don't get updates via mount propagation will retain the mount to use
if they need to, as they would without this change, but the originating
namespace will also continue to function as expected.

The child namespace needs cleanup its mounts on exit, which it had to do prior
to this change also.

> 
> I believe this is a question of how do notifications of the desire for
> an automount work after your change, and are those notifications
> consistent with your desired and/or expected behavior.

It sounds like you might be assuming the service receiving these cloned mounts
actually wants to use them or is expecting them to behave like automount mounts.
But that's not what I've seen and is not the way these cloned mounts behave
without the change.

However, as has probably occurred to you by now, there is a semantic change with
this for namespaces that don't receive mount propogation.

If a mount request is triggered by an access in the subordinate namespace for a
dentry that is already mounted in the parent namespace it will silently fail (in
that a mount won't appear in the subordinate namespace) rather than getting an
ELOOP error as it would now.

It's also the case that, if such a mount isn't already mounted, it will cause a
mount to occur in the parent namespace. But that is also the way it is without
the change.

TBH I don't know yet how to resolve that, ideally the cloned mounts would not
appear in the subordinate namespace upon creation but that's also not currently
possible to do and even if it was it would mean quite a change in to the way
things behave now. 

All in all I believe the change here solves a problem that needs to be solved
without affecting normal usage at the expense of a small behaviour change to
cases where automount isn't providing a mounting service.

> 
> Eric
> 
> 
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 15, 2016, 8:19 a.m. UTC | #7
On Thu, 2016-09-15 at 12:12 +0800, Ian Kent wrote:
> On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
> > Ian Kent <raven@themaw.net> writes:
> > 
> > > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> > > > Ian Kent <raven@themaw.net> writes:
> > > > 
> > > > > If an automount mount is clone(2)ed into a file system that is
> > > > > propagation private, when it later expires in the originating
> > > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > > dentry in the original namespace will return ELOOP until the
> > > > > mount is manually umounted in the cloned namespace.
> > > > > 
> > > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > > running within a container the dentry will be seen as mounted in
> > > > > the root init namespace and calls to ->d_automount() in that namespace
> > > > > will return ELOOP until the mount is umounted within the container.
> > > > > 
> > > > > Also, have_submounts() can return an incorect result when a mount
> > > > > exists in a namespace other than the one being checked.
> > > > 
> > > > Overall this appears to be a fairly reasonable set of changes.  It does
> > > > increase the expense when an actual mount point is encountered, but if
> > > > these are the desired some increase in cost when a dentry is a
> > > > mountpoint is unavoidable.
> > > > 
> > > > May I ask the motiviation for this set of changes?  Reading through the
> > > > changes I don't grasp why we want to change the behavior of autofs.
> > > > What problem is being solved?  What are the benefits?
> > > 
> > > LOL, it's all too easy for me to give a patch description that I think
> > > explains
> > > a problem I need to solve without realizing it isn't clear to others what
> > > the
> > > problem is, sorry about that.
> > > 
> > > For quite a while now, and not that frequently but consistently, I've been
> > > getting reports of people using autofs getting ELOOP errors and not being
> > > able
> > > to mount automounts.
> > > 
> > > This has been due to the cloning of autofs file systems (that have active
> > > automounts at the time of the clone) by other systems.
> > > 
> > > An unshare, as one example, can easily result in the cloning of an autofs
> > > file
> > > system that has active mounts which shows this problem.
> > > 
> > > Once an active mount that has been cloned is expired in the namespace that
> > > performed the unshare it can't be (auto)mounted again in the the
> > > originating
> > > namespace because the mounted check in the autofs module will think it is
> > > already mounted.
> > > 
> > > I'm not sure this is a clear description either, hopefully it is enough to
> > > demonstrate the type of problem I'm typing to solve.
> > 
> > So to rephrase the problem is that an autofs instance can stop working
> > properly from the perspective of the mount namespace it is mounted in
> > if the autofs instance is shared between multiple mount namespaces.  The
> > problem is that mounts and unmounts do not always propogate between
> > mount namespaces.  This lack of symmetric mount/unmount behavior
> > leads to mountpoints that become unusable.
> 
> That's right.
> 
> It's also worth considering that symmetric mount propagation is usually not
> the
> behaviour needed either and things like LXC and Docker are set propagation
> slave
> because of problems caused by propagation back to the parent namespace.
> 
> So a mount can be triggered within a container, mounted by the automount
> daemon
> in the parent namespace, and propagated to the child and similarly for
> expires,
> which is the common use case now.
> 
> > 
> > Which leads to the question what is the expected new behavior with your
> > patchset applied.  New mounts can be added in the parent mount namespace
> > (because the test is local).  Does your change also allow the
> > autofs mountpoints to be used in the other mount namespaces that share
> > the autofs instance if everything becomes unmounted?
> 
> The problem occurs when the subordinate namespace doesn't deal with these
> propagated mounts properly, although they can obviously be used by the
> subordinate namespace.
> 
> > 
> > Or is it expected that other mount namespaces that share an autofs
> > instance will get changes in their mounts via mount propagation and if
> > mount propagation is insufficient they are on their own.
> 
> Namespaces that receive updates via mount propagation from a parent will
> continue to function as they do now.
> 
> Mounts that don't get updates via mount propagation will retain the mount to
> use
> if they need to, as they would without this change, but the originating
> namespace will also continue to function as expected.
> 
> The child namespace needs cleanup its mounts on exit, which it had to do prior
> to this change also.
> 
> > 
> > I believe this is a question of how do notifications of the desire for
> > an automount work after your change, and are those notifications
> > consistent with your desired and/or expected behavior.
> 
> It sounds like you might be assuming the service receiving these cloned mounts
> actually wants to use them or is expecting them to behave like automount
> mounts.
> But that's not what I've seen and is not the way these cloned mounts behave
> without the change.
> 
> However, as has probably occurred to you by now, there is a semantic change
> with
> this for namespaces that don't receive mount propogation.
> 
> If a mount request is triggered by an access in the subordinate namespace for
> a
> dentry that is already mounted in the parent namespace it will silently fail
> (in
> that a mount won't appear in the subordinate namespace) rather than getting an
> ELOOP error as it would now.

My mistake, sorry, looking at this again this case will still fail with ELOOP as
it does now.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 16, 2016, 12:47 a.m. UTC | #8
Ian Kent <raven@themaw.net> writes:

> On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
>> > > Ian Kent <raven@themaw.net> writes:
>> > > 
>> > > > If an automount mount is clone(2)ed into a file system that is
>> > > > propagation private, when it later expires in the originating
>> > > > namespace subsequent calls to autofs ->d_automount() for that
>> > > > dentry in the original namespace will return ELOOP until the
>> > > > mount is manually umounted in the cloned namespace.
>> > > > 
>> > > > In the same way, if an autofs mount is triggered by automount(8)
>> > > > running within a container the dentry will be seen as mounted in
>> > > > the root init namespace and calls to ->d_automount() in that namespace
>> > > > will return ELOOP until the mount is umounted within the container.
>> > > > 
>> > > > Also, have_submounts() can return an incorect result when a mount
>> > > > exists in a namespace other than the one being checked.
>> > > 
>> > > Overall this appears to be a fairly reasonable set of changes.  It does
>> > > increase the expense when an actual mount point is encountered, but if
>> > > these are the desired some increase in cost when a dentry is a
>> > > mountpoint is unavoidable.
>> > > 
>> > > May I ask the motiviation for this set of changes?  Reading through the
>> > > changes I don't grasp why we want to change the behavior of autofs.
>> > > What problem is being solved?  What are the benefits?
>> > 
>> > LOL, it's all too easy for me to give a patch description that I think
>> > explains
>> > a problem I need to solve without realizing it isn't clear to others what
>> > the
>> > problem is, sorry about that.
>> > 
>> > For quite a while now, and not that frequently but consistently, I've been
>> > getting reports of people using autofs getting ELOOP errors and not being
>> > able
>> > to mount automounts.
>> > 
>> > This has been due to the cloning of autofs file systems (that have active
>> > automounts at the time of the clone) by other systems.
>> > 
>> > An unshare, as one example, can easily result in the cloning of an autofs
>> > file
>> > system that has active mounts which shows this problem.
>> > 
>> > Once an active mount that has been cloned is expired in the namespace that
>> > performed the unshare it can't be (auto)mounted again in the the originating
>> > namespace because the mounted check in the autofs module will think it is
>> > already mounted.
>> > 
>> > I'm not sure this is a clear description either, hopefully it is enough to
>> > demonstrate the type of problem I'm typing to solve.
>> 
>> So to rephrase the problem is that an autofs instance can stop working
>> properly from the perspective of the mount namespace it is mounted in
>> if the autofs instance is shared between multiple mount namespaces.  The
>> problem is that mounts and unmounts do not always propogate between
>> mount namespaces.  This lack of symmetric mount/unmount behavior
>> leads to mountpoints that become unusable.
>
> That's right.
>
> It's also worth considering that symmetric mount propagation is usually not the
> behaviour needed either and things like LXC and Docker are set propagation slave
> because of problems caused by propagation back to the parent namespace.
>
> So a mount can be triggered within a container, mounted by the automount daemon
> in the parent namespace, and propagated to the child and similarly for expires,
> which is the common use case now.
>
>> 
>> Which leads to the question what is the expected new behavior with your
>> patchset applied.  New mounts can be added in the parent mount namespace
>> (because the test is local).  Does your change also allow the
>> autofs mountpoints to be used in the other mount namespaces that share
>> the autofs instance if everything becomes unmounted?
>
> The problem occurs when the subordinate namespace doesn't deal with these
> propagated mounts properly, although they can obviously be used by the
> subordinate namespace.
>
>> 
>> Or is it expected that other mount namespaces that share an autofs
>> instance will get changes in their mounts via mount propagation and if
>> mount propagation is insufficient they are on their own.
>
> Namespaces that receive updates via mount propagation from a parent will
> continue to function as they do now.
>
> Mounts that don't get updates via mount propagation will retain the mount to use
> if they need to, as they would without this change, but the originating
> namespace will also continue to function as expected.
>
> The child namespace needs cleanup its mounts on exit, which it had to do prior
> to this change also.
>
>> 
>> I believe this is a question of how do notifications of the desire for
>> an automount work after your change, and are those notifications
>> consistent with your desired and/or expected behavior.
>
> It sounds like you might be assuming the service receiving these cloned mounts
> actually wants to use them or is expecting them to behave like automount mounts.
> But that's not what I've seen and is not the way these cloned mounts behave
> without the change.
>
> However, as has probably occurred to you by now, there is a semantic change with
> this for namespaces that don't receive mount propogation.
>
> If a mount request is triggered by an access in the subordinate namespace for a
> dentry that is already mounted in the parent namespace it will silently fail (in
> that a mount won't appear in the subordinate namespace) rather than getting an
> ELOOP error as it would now.
>
> It's also the case that, if such a mount isn't already mounted, it will cause a
> mount to occur in the parent namespace. But that is also the way it is without
> the change.
>
> TBH I don't know yet how to resolve that, ideally the cloned mounts would not
> appear in the subordinate namespace upon creation but that's also not currently
> possible to do and even if it was it would mean quite a change in to the way
> things behave now.
>
> All in all I believe the change here solves a problem that needs to be solved
> without affecting normal usage at the expense of a small behaviour change to
> cases where automount isn't providing a mounting service.

That sounds like a reasonable semantic change.  Limiting the responses
of the autofs mount path to what is present in the mount namespace
of the program that actually performs the autofs mounts seems needed.

In fact the entire local mount concept exists because I was solving a
very similar problem for rename, unlink and rmdir.  Where a cloned mount
namespace could cause a denial of service attack on the original
mount namespace.

I don't know if this change makes sense for mount expiry.

Unless I am misreading something when a mount namespace is cloned the
new mounts are put into the same expiry group as the old mounts.
Furthermore the triggers for mounts are based on the filesystem.


I can think of 3 ways to use mount namespaces that are relevant
to this discussion.

- Symmetric mount propagation where everything is identical except
  for specific mounts such as /tmp.

- Slave mount propagation where all of the mounts are created in
  the parent and propgated to the slave, except for specific exceptions.

- Disabled mount propagation.  Where updates are simply not received
  by the namespace.  The mount namespace is expected to change in
  ways that are completely independent of the parent (and this breaks
  autofs).

In the first two cases the desire is to have the same set of mounts
except for specific exceptions so it is generally desirable.  So having
someone using a mount in another mount namespace seems like a good
reason not to expire the mount.

Furthermore since the processes can always trigger or hang onto the
mounts without using mount namespaces I don't think those cases add
anything new to the set of problems.

It seems to me the real problem is when something is unmounted in the
original mount namespace and not in the slaves which causes the mount
calls to fail and cause all kinds of havoc.

Unless you can see an error in my reasoning I think the local mount
tests should be limited to just the mount path.  That is sufficient to
keep autofs working as expected while still respecting non-problem users
in other mount namespaces.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 16, 2016, 2:58 a.m. UTC | #9
On Thu, 2016-09-15 at 19:47 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> > > > > Ian Kent <raven@themaw.net> writes:
> > > > > 
> > > > > > If an automount mount is clone(2)ed into a file system that is
> > > > > > propagation private, when it later expires in the originating
> > > > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > > > dentry in the original namespace will return ELOOP until the
> > > > > > mount is manually umounted in the cloned namespace.
> > > > > > 
> > > > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > > > running within a container the dentry will be seen as mounted in
> > > > > > the root init namespace and calls to ->d_automount() in that
> > > > > > namespace
> > > > > > will return ELOOP until the mount is umounted within the container.
> > > > > > 
> > > > > > Also, have_submounts() can return an incorect result when a mount
> > > > > > exists in a namespace other than the one being checked.
> > > > > 
> > > > > Overall this appears to be a fairly reasonable set of changes.  It
> > > > > does
> > > > > increase the expense when an actual mount point is encountered, but if
> > > > > these are the desired some increase in cost when a dentry is a
> > > > > mountpoint is unavoidable.
> > > > > 
> > > > > May I ask the motiviation for this set of changes?  Reading through
> > > > > the
> > > > > changes I don't grasp why we want to change the behavior of autofs.
> > > > > What problem is being solved?  What are the benefits?
> > > > 
> > > > LOL, it's all too easy for me to give a patch description that I think
> > > > explains
> > > > a problem I need to solve without realizing it isn't clear to others
> > > > what
> > > > the
> > > > problem is, sorry about that.
> > > > 
> > > > For quite a while now, and not that frequently but consistently, I've
> > > > been
> > > > getting reports of people using autofs getting ELOOP errors and not
> > > > being
> > > > able
> > > > to mount automounts.
> > > > 
> > > > This has been due to the cloning of autofs file systems (that have
> > > > active
> > > > automounts at the time of the clone) by other systems.
> > > > 
> > > > An unshare, as one example, can easily result in the cloning of an
> > > > autofs
> > > > file
> > > > system that has active mounts which shows this problem.
> > > > 
> > > > Once an active mount that has been cloned is expired in the namespace
> > > > that
> > > > performed the unshare it can't be (auto)mounted again in the the
> > > > originating
> > > > namespace because the mounted check in the autofs module will think it
> > > > is
> > > > already mounted.
> > > > 
> > > > I'm not sure this is a clear description either, hopefully it is enough
> > > > to
> > > > demonstrate the type of problem I'm typing to solve.
> > > 
> > > So to rephrase the problem is that an autofs instance can stop working
> > > properly from the perspective of the mount namespace it is mounted in
> > > if the autofs instance is shared between multiple mount namespaces.  The
> > > problem is that mounts and unmounts do not always propogate between
> > > mount namespaces.  This lack of symmetric mount/unmount behavior
> > > leads to mountpoints that become unusable.
> > 
> > That's right.
> > 
> > It's also worth considering that symmetric mount propagation is usually not
> > the
> > behaviour needed either and things like LXC and Docker are set propagation
> > slave
> > because of problems caused by propagation back to the parent namespace.
> > 
> > So a mount can be triggered within a container, mounted by the automount
> > daemon
> > in the parent namespace, and propagated to the child and similarly for
> > expires,
> > which is the common use case now.
> > 
> > > 
> > > Which leads to the question what is the expected new behavior with your
> > > patchset applied.  New mounts can be added in the parent mount namespace
> > > (because the test is local).  Does your change also allow the
> > > autofs mountpoints to be used in the other mount namespaces that share
> > > the autofs instance if everything becomes unmounted?
> > 
> > The problem occurs when the subordinate namespace doesn't deal with these
> > propagated mounts properly, although they can obviously be used by the
> > subordinate namespace.
> > 
> > > 
> > > Or is it expected that other mount namespaces that share an autofs
> > > instance will get changes in their mounts via mount propagation and if
> > > mount propagation is insufficient they are on their own.
> > 
> > Namespaces that receive updates via mount propagation from a parent will
> > continue to function as they do now.
> > 
> > Mounts that don't get updates via mount propagation will retain the mount to
> > use
> > if they need to, as they would without this change, but the originating
> > namespace will also continue to function as expected.
> > 
> > The child namespace needs cleanup its mounts on exit, which it had to do
> > prior
> > to this change also.
> > 
> > > 
> > > I believe this is a question of how do notifications of the desire for
> > > an automount work after your change, and are those notifications
> > > consistent with your desired and/or expected behavior.
> > 
> > It sounds like you might be assuming the service receiving these cloned
> > mounts
> > actually wants to use them or is expecting them to behave like automount
> > mounts.
> > But that's not what I've seen and is not the way these cloned mounts behave
> > without the change.
> > 
> > However, as has probably occurred to you by now, there is a semantic change
> > with
> > this for namespaces that don't receive mount propogation.
> > 
> > If a mount request is triggered by an access in the subordinate namespace
> > for a
> > dentry that is already mounted in the parent namespace it will silently fail
> > (in
> > that a mount won't appear in the subordinate namespace) rather than getting
> > an
> > ELOOP error as it would now.
> > 
> > It's also the case that, if such a mount isn't already mounted, it will
> > cause a
> > mount to occur in the parent namespace. But that is also the way it is
> > without
> > the change.
> > 
> > TBH I don't know yet how to resolve that, ideally the cloned mounts would
> > not
> > appear in the subordinate namespace upon creation but that's also not
> > currently
> > possible to do and even if it was it would mean quite a change in to the way
> > things behave now.
> > 
> > All in all I believe the change here solves a problem that needs to be
> > solved
> > without affecting normal usage at the expense of a small behaviour change to
> > cases where automount isn't providing a mounting service.
> 
> That sounds like a reasonable semantic change.  Limiting the responses
> of the autofs mount path to what is present in the mount namespace
> of the program that actually performs the autofs mounts seems needed.

Indeed, yes.

> 
> In fact the entire local mount concept exists because I was solving a
> very similar problem for rename, unlink and rmdir.  Where a cloned mount
> namespace could cause a denial of service attack on the original
> mount namespace.
> 
> I don't know if this change makes sense for mount expiry.

Originally I thought it did but now I think your right, it won't actually make a
difference.

Let me think a little more about it, I thought there was a reason I included the
expire in the changes but I can't remember now.

It may be that originally I thought individual automount(8) instances within
containers could be affected by an instance of automount(8) in the root
namespace (and visa versa) but now I think these will all be isolated.

My assumption being that people don't stupid things like pass an autofs mount to
a container and expect to also run a distinct automount(8) instance within the
same container.

> 
> Unless I am misreading something when a mount namespace is cloned the
> new mounts are put into the same expiry group as the old mounts.

autofs doesn't use the in kernel expiry but conceptually this is right.

> Furthermore the triggers for mounts are based on the filesystem.

Yes, that's also the case.

> 
> 
> I can think of 3 ways to use mount namespaces that are relevant
> to this discussion.
> 
> - Symmetric mount propagation where everything is identical except
>   for specific mounts such as /tmp.

I'm not sure this case is useful in practice, at least not currently, and there
is at least one case where systemd setting the root file system shared breaks
autofs.

> 
> - Slave mount propagation where all of the mounts are created in
>   the parent and propgated to the slave, except for specific exceptions.

This is currently the common case AFAIK.

Docker, for example, would pass --volume=/autofs/indirect/mount at startup.

There's no sensible way I'm aware of that autofs direct mounts can be used in
this way but that's different problem.

> 
> - Disabled mount propagation.  Where updates are simply not received
>   by the namespace.  The mount namespace is expected to change in
>   ways that are completely independent of the parent (and this breaks
>   autofs).

This is also a case I think is needed.

For example, running independent automount(8) instances within containers.

Running an instance of automount(8) in a container should behave like this
already.

> 
> In the first two cases the desire is to have the same set of mounts
> except for specific exceptions so it is generally desirable.  So having
> someone using a mount in another mount namespace seems like a good
> reason not to expire the mount.

Yes, that's something I have been thinking about.

This is essentially the way it is now and I don't see any reason to change it.

After all automounting is meant to conserve resources so keeping something
mounted that is being used somewhere makes sense.

> 
> Furthermore since the processes can always trigger or hang onto the
> mounts without using mount namespaces I don't think those cases add
> anything new to the set of problems.
> 
> It seems to me the real problem is when something is unmounted in the
> original mount namespace and not in the slaves which causes the mount
> calls to fail and cause all kinds of havoc.

It does, yes.

> 
> Unless you can see an error in my reasoning I think the local mount
> tests should be limited to just the mount path.  That is sufficient to
> keep autofs working as expected while still respecting non-problem users
> in other mount namespaces.

Right, as I said above give me a little time on that.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Omar Sandoval Sept. 16, 2016, 9:14 p.m. UTC | #10
On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> If an automount mount is clone(2)ed into a file system that is
> propagation private, when it later expires in the originating
> namespace subsequent calls to autofs ->d_automount() for that
> dentry in the original namespace will return ELOOP until the
> mount is manually umounted in the cloned namespace.
> 
> In the same way, if an autofs mount is triggered by automount(8)
> running within a container the dentry will be seen as mounted in
> the root init namespace and calls to ->d_automount() in that namespace
> will return ELOOP until the mount is umounted within the container.
> 
> Also, have_submounts() can return an incorect result when a mount
> exists in a namespace other than the one being checked.
> 
> Signed-off-by: Ian Kent <raven@themaw.net>
> Cc: Al Viro <viro@ZenIV.linux.org.uk>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Omar Sandoval <osandov@osandov.com>

I can confirm that this fixes my repro and hasn't caused any other
problems as far as I can tell.

Tested-by: Omar Sandoval <osandov@fb.com>
Mateusz Guzik Sept. 17, 2016, 8:10 p.m. UTC | #11
On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> If an automount mount is clone(2)ed into a file system that is
> propagation private, when it later expires in the originating
> namespace subsequent calls to autofs ->d_automount() for that
> dentry in the original namespace will return ELOOP until the
> mount is manually umounted in the cloned namespace.
> 
> In the same way, if an autofs mount is triggered by automount(8)
> running within a container the dentry will be seen as mounted in
> the root init namespace and calls to ->d_automount() in that namespace
> will return ELOOP until the mount is umounted within the container.
> 
> Also, have_submounts() can return an incorect result when a mount
> exists in a namespace other than the one being checked.
> 
> @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
>  
>  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
>  			return 0;
> -		if (d_mountpoint(dentry))
> +		if (is_local_mountpoint(dentry))
>  			return 0;
>  		inode = d_inode_rcu(dentry);
>  		if (inode && S_ISLNK(inode->i_mode))

This change is within RCU lookup.

is_local_mountpoint may end up calling __is_local_mountpoint, which will
optionally take the namespace_sem lock, resulting in a splat:

 #0:  (&(&sbi->fs_lock)->rlock){+.+...}, at: [<ffffffff816a4572>] autofs4_d_manage+0x202/0x290^M
Preemption disabled at:[<ffffffff816a4572>] autofs4_d_manage+0x202/0x290^M
^M
CPU: 1 PID: 1307 Comm: iknowthis Not tainted 4.8.0-rc6-next-20160916dupa #448^M
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011^M
 ffff8800077378f8 ffffffff818eaffc 0000000000000001 0000000000000000^M
 ffff880007737930 ffffffff8110c870 ffff880007588048 ffffffff82483840^M
 0000000000000015 0000000000000000 ffff880007588040 ffff880007737978^M
Call Trace:^M
 [<ffffffff818eaffc>] dump_stack+0x85/0xc9^M
 [<ffffffff8110c870>] ___might_sleep+0x1e0/0x2e0^M
 [<ffffffff8110c9e1>] __might_sleep+0x71/0xe0^M
 [<ffffffff8110d039>] ? preempt_count_sub+0x39/0x80^M
 [<ffffffff8220352a>] down_read+0x2a/0xc0^M
 [<ffffffff813f7ec6>] __is_local_mountpoint+0x66/0xe0^M
 [<ffffffff816a45d3>] autofs4_d_manage+0x263/0x290^M
 [<ffffffff813d1a47>] follow_managed+0x157/0x480^M
 [<ffffffff813d6b5b>] lookup_fast+0x3ab/0x690^M
 [<ffffffff813d67b0>] ? trailing_symlink+0x370/0x370^M
 [<ffffffff813d7757>] ? path_init+0x917/0xa10^M
 [<ffffffff811525e7>] ? __mutex_init+0x77/0x80^M
 [<ffffffff813d910c>] path_openat+0x2bc/0x13e0^M
 [<ffffffff813d8e50>] ? path_lookupat+0x1f0/0x1f0^M
 [<ffffffff8137e48f>] ? __asan_loadN+0xf/0x20^M
 [<ffffffff81088776>] ? pvclock_clocksource_read+0xd6/0x180^M
 [<ffffffff810870d3>] ? kvm_clock_read+0x23/0x40^M
 [<ffffffff813dc3a2>] do_filp_open+0x122/0x1c0^M
 [<ffffffff8110d039>] ? preempt_count_sub+0x39/0x80^M
 [<ffffffff813dc280>] ? may_open_dev+0x50/0x50^M
 [<ffffffff8110cf88>] ? preempt_count_sub.part.67+0x18/0x90^M
 [<ffffffff8110d039>] ? preempt_count_sub+0x39/0x80^M
 [<ffffffff82207c31>] ? _raw_spin_unlock+0x31/0x50^M
 [<ffffffff813f6061>] ? __alloc_fd+0x141/0x2b0^M
 [<ffffffff813bd02c>] do_sys_open+0x17c/0x2c0^M
 [<ffffffff813bceb0>] ? filp_open+0x60/0x60^M
 [<ffffffff8100201a>] ? trace_hardirqs_on_thunk+0x1a/0x1c^M
 [<ffffffff813bd18e>] SyS_open+0x1e/0x20^M
 [<ffffffff82208701>] entry_SYSCALL_64_fastpath+0x1f/0xc2^M
 [<ffffffff811549e5>] ? trace_hardirqs_off_caller+0xc5/0x120^M

I don't know this code. Perhaps it will be perfectly fine performance wise to
just drop out of RCU lookup in this case.
Ian Kent Sept. 19, 2016, 12:58 a.m. UTC | #12
On Fri, 2016-09-16 at 10:58 +0800, Ian Kent wrote:
> On Thu, 2016-09-15 at 19:47 -0500, Eric W. Biederman wrote:
> > Ian Kent <raven@themaw.net> writes:
> > 
> > > On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
> > > > Ian Kent <raven@themaw.net> writes:
> > > > 
> > > > > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
> > > > > > Ian Kent <raven@themaw.net> writes:
> > > > > > 
> > > > > > > If an automount mount is clone(2)ed into a file system that is
> > > > > > > propagation private, when it later expires in the originating
> > > > > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > > > > dentry in the original namespace will return ELOOP until the
> > > > > > > mount is manually umounted in the cloned namespace.
> > > > > > > 
> > > > > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > > > > running within a container the dentry will be seen as mounted in
> > > > > > > the root init namespace and calls to ->d_automount() in that
> > > > > > > namespace
> > > > > > > will return ELOOP until the mount is umounted within the
> > > > > > > container.
> > > > > > > 
> > > > > > > Also, have_submounts() can return an incorect result when a mount
> > > > > > > exists in a namespace other than the one being checked.
> > > > > > 
> > > > > > Overall this appears to be a fairly reasonable set of changes.  It
> > > > > > does
> > > > > > increase the expense when an actual mount point is encountered, but
> > > > > > if
> > > > > > these are the desired some increase in cost when a dentry is a
> > > > > > mountpoint is unavoidable.
> > > > > > 
> > > > > > May I ask the motiviation for this set of changes?  Reading through
> > > > > > the
> > > > > > changes I don't grasp why we want to change the behavior of autofs.
> > > > > > What problem is being solved?  What are the benefits?
> > > > > 
> > > > > LOL, it's all too easy for me to give a patch description that I think
> > > > > explains
> > > > > a problem I need to solve without realizing it isn't clear to others
> > > > > what
> > > > > the
> > > > > problem is, sorry about that.
> > > > > 
> > > > > For quite a while now, and not that frequently but consistently, I've
> > > > > been
> > > > > getting reports of people using autofs getting ELOOP errors and not
> > > > > being
> > > > > able
> > > > > to mount automounts.
> > > > > 
> > > > > This has been due to the cloning of autofs file systems (that have
> > > > > active
> > > > > automounts at the time of the clone) by other systems.
> > > > > 
> > > > > An unshare, as one example, can easily result in the cloning of an
> > > > > autofs
> > > > > file
> > > > > system that has active mounts which shows this problem.
> > > > > 
> > > > > Once an active mount that has been cloned is expired in the namespace
> > > > > that
> > > > > performed the unshare it can't be (auto)mounted again in the the
> > > > > originating
> > > > > namespace because the mounted check in the autofs module will think it
> > > > > is
> > > > > already mounted.
> > > > > 
> > > > > I'm not sure this is a clear description either, hopefully it is
> > > > > enough
> > > > > to
> > > > > demonstrate the type of problem I'm typing to solve.
> > > > 
> > > > So to rephrase the problem is that an autofs instance can stop working
> > > > properly from the perspective of the mount namespace it is mounted in
> > > > if the autofs instance is shared between multiple mount namespaces.  The
> > > > problem is that mounts and unmounts do not always propogate between
> > > > mount namespaces.  This lack of symmetric mount/unmount behavior
> > > > leads to mountpoints that become unusable.
> > > 
> > > That's right.
> > > 
> > > It's also worth considering that symmetric mount propagation is usually
> > > not
> > > the
> > > behaviour needed either and things like LXC and Docker are set propagation
> > > slave
> > > because of problems caused by propagation back to the parent namespace.
> > > 
> > > So a mount can be triggered within a container, mounted by the automount
> > > daemon
> > > in the parent namespace, and propagated to the child and similarly for
> > > expires,
> > > which is the common use case now.
> > > 
> > > > 
> > > > Which leads to the question what is the expected new behavior with your
> > > > patchset applied.  New mounts can be added in the parent mount namespace
> > > > (because the test is local).  Does your change also allow the
> > > > autofs mountpoints to be used in the other mount namespaces that share
> > > > the autofs instance if everything becomes unmounted?
> > > 
> > > The problem occurs when the subordinate namespace doesn't deal with these
> > > propagated mounts properly, although they can obviously be used by the
> > > subordinate namespace.
> > > 
> > > > 
> > > > Or is it expected that other mount namespaces that share an autofs
> > > > instance will get changes in their mounts via mount propagation and if
> > > > mount propagation is insufficient they are on their own.
> > > 
> > > Namespaces that receive updates via mount propagation from a parent will
> > > continue to function as they do now.
> > > 
> > > Mounts that don't get updates via mount propagation will retain the mount
> > > to
> > > use
> > > if they need to, as they would without this change, but the originating
> > > namespace will also continue to function as expected.
> > > 
> > > The child namespace needs cleanup its mounts on exit, which it had to do
> > > prior
> > > to this change also.
> > > 
> > > > 
> > > > I believe this is a question of how do notifications of the desire for
> > > > an automount work after your change, and are those notifications
> > > > consistent with your desired and/or expected behavior.
> > > 
> > > It sounds like you might be assuming the service receiving these cloned
> > > mounts
> > > actually wants to use them or is expecting them to behave like automount
> > > mounts.
> > > But that's not what I've seen and is not the way these cloned mounts
> > > behave
> > > without the change.
> > > 
> > > However, as has probably occurred to you by now, there is a semantic
> > > change
> > > with
> > > this for namespaces that don't receive mount propogation.
> > > 
> > > If a mount request is triggered by an access in the subordinate namespace
> > > for a
> > > dentry that is already mounted in the parent namespace it will silently
> > > fail
> > > (in
> > > that a mount won't appear in the subordinate namespace) rather than
> > > getting
> > > an
> > > ELOOP error as it would now.
> > > 
> > > It's also the case that, if such a mount isn't already mounted, it will
> > > cause a
> > > mount to occur in the parent namespace. But that is also the way it is
> > > without
> > > the change.
> > > 
> > > TBH I don't know yet how to resolve that, ideally the cloned mounts would
> > > not
> > > appear in the subordinate namespace upon creation but that's also not
> > > currently
> > > possible to do and even if it was it would mean quite a change in to the
> > > way
> > > things behave now.
> > > 
> > > All in all I believe the change here solves a problem that needs to be
> > > solved
> > > without affecting normal usage at the expense of a small behaviour change
> > > to
> > > cases where automount isn't providing a mounting service.
> > 
> > That sounds like a reasonable semantic change.  Limiting the responses
> > of the autofs mount path to what is present in the mount namespace
> > of the program that actually performs the autofs mounts seems needed.
> 
> Indeed, yes.
> 
> > 
> > In fact the entire local mount concept exists because I was solving a
> > very similar problem for rename, unlink and rmdir.  Where a cloned mount
> > namespace could cause a denial of service attack on the original
> > mount namespace.
> > 
> > I don't know if this change makes sense for mount expiry.
> 
> Originally I thought it did but now I think your right, it won't actually make
> a
> difference.
> 
> Let me think a little more about it, I thought there was a reason I included
> the
> expire in the changes but I can't remember now.
> 
> It may be that originally I thought individual automount(8) instances within
> containers could be affected by an instance of automount(8) in the root
> namespace (and visa versa) but now I think these will all be isolated.

I also thought that the autofs expire would continue to see the umounted mount
and continue calling back to the daemon in an attempt to umount it.

That isn't the case so I can drop the changes to the expire expire code as you
recommend.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 19, 2016, 1:36 a.m. UTC | #13
On Sat, 2016-09-17 at 22:10 +0200, Mateusz Guzik wrote:
> On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> > If an automount mount is clone(2)ed into a file system that is
> > propagation private, when it later expires in the originating
> > namespace subsequent calls to autofs ->d_automount() for that
> > dentry in the original namespace will return ELOOP until the
> > mount is manually umounted in the cloned namespace.
> > 
> > In the same way, if an autofs mount is triggered by automount(8)
> > running within a container the dentry will be seen as mounted in
> > the root init namespace and calls to ->d_automount() in that namespace
> > will return ELOOP until the mount is umounted within the container.
> > 
> > Also, have_submounts() can return an incorect result when a mount
> > exists in a namespace other than the one being checked.
> > 
> > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
> > rcu_walk)
> >  
> >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> >  			return 0;
> > -		if (d_mountpoint(dentry))
> > +		if (is_local_mountpoint(dentry))
> >  			return 0;
> >  		inode = d_inode_rcu(dentry);
> >  		if (inode && S_ISLNK(inode->i_mode))
> 
> This change is within RCU lookup.
> 
> is_local_mountpoint may end up calling __is_local_mountpoint, which will
> optionally take the namespace_sem lock, resulting in a splat:

Yes, that's a serious problem I missed.

snip ...

> I don't know this code. Perhaps it will be perfectly fine performance wise to
> just drop out of RCU lookup in this case.

It's a bit worse than that.

I think being able to continue the rcu-walk for an already mounted dentry that
is not being expired is an important part of the performance improvement given
by the series that added this.

Can you confirm that Neil?

But for the case here the existing test can allow rcu-walk to continue for a
dentry that would attempt to trigger an automount so it's also a bug in the
existing code.

Any thoughts on how we can handle this Neil, I'm having a bit of trouble working
out how to resolve this one.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 20, 2016, 4:09 p.m. UTC | #14
Ian Kent <raven@themaw.net> writes:

> On Fri, 2016-09-16 at 10:58 +0800, Ian Kent wrote:
>> On Thu, 2016-09-15 at 19:47 -0500, Eric W. Biederman wrote:
>> > Ian Kent <raven@themaw.net> writes:
>> > 
>> > > On Wed, 2016-09-14 at 21:08 -0500, Eric W. Biederman wrote:
>> > > > Ian Kent <raven@themaw.net> writes:
>> > > > 
>> > > > > On Wed, 2016-09-14 at 12:28 -0500, Eric W. Biederman wrote:
>> > > > > > Ian Kent <raven@themaw.net> writes:
>> > > > > > 
>> > > > > > > If an automount mount is clone(2)ed into a file system that is
>> > > > > > > propagation private, when it later expires in the originating
>> > > > > > > namespace subsequent calls to autofs ->d_automount() for that
>> > > > > > > dentry in the original namespace will return ELOOP until the
>> > > > > > > mount is manually umounted in the cloned namespace.
>> > > > > > > 
>> > > > > > > In the same way, if an autofs mount is triggered by automount(8)
>> > > > > > > running within a container the dentry will be seen as mounted in
>> > > > > > > the root init namespace and calls to ->d_automount() in that
>> > > > > > > namespace
>> > > > > > > will return ELOOP until the mount is umounted within the
>> > > > > > > container.
>> > > > > > > 
>> > > > > > > Also, have_submounts() can return an incorect result when a mount
>> > > > > > > exists in a namespace other than the one being checked.
>> > > > > > 
>> > > > > > Overall this appears to be a fairly reasonable set of changes.  It
>> > > > > > does
>> > > > > > increase the expense when an actual mount point is encountered, but
>> > > > > > if
>> > > > > > these are the desired some increase in cost when a dentry is a
>> > > > > > mountpoint is unavoidable.
>> > > > > > 
>> > > > > > May I ask the motiviation for this set of changes?  Reading through
>> > > > > > the
>> > > > > > changes I don't grasp why we want to change the behavior of autofs.
>> > > > > > What problem is being solved?  What are the benefits?
>> > > > > 
>> > > > > LOL, it's all too easy for me to give a patch description that I think
>> > > > > explains
>> > > > > a problem I need to solve without realizing it isn't clear to others
>> > > > > what
>> > > > > the
>> > > > > problem is, sorry about that.
>> > > > > 
>> > > > > For quite a while now, and not that frequently but consistently, I've
>> > > > > been
>> > > > > getting reports of people using autofs getting ELOOP errors and not
>> > > > > being
>> > > > > able
>> > > > > to mount automounts.
>> > > > > 
>> > > > > This has been due to the cloning of autofs file systems (that have
>> > > > > active
>> > > > > automounts at the time of the clone) by other systems.
>> > > > > 
>> > > > > An unshare, as one example, can easily result in the cloning of an
>> > > > > autofs
>> > > > > file
>> > > > > system that has active mounts which shows this problem.
>> > > > > 
>> > > > > Once an active mount that has been cloned is expired in the namespace
>> > > > > that
>> > > > > performed the unshare it can't be (auto)mounted again in the the
>> > > > > originating
>> > > > > namespace because the mounted check in the autofs module will think it
>> > > > > is
>> > > > > already mounted.
>> > > > > 
>> > > > > I'm not sure this is a clear description either, hopefully it is
>> > > > > enough
>> > > > > to
>> > > > > demonstrate the type of problem I'm typing to solve.
>> > > > 
>> > > > So to rephrase the problem is that an autofs instance can stop working
>> > > > properly from the perspective of the mount namespace it is mounted in
>> > > > if the autofs instance is shared between multiple mount namespaces.  The
>> > > > problem is that mounts and unmounts do not always propogate between
>> > > > mount namespaces.  This lack of symmetric mount/unmount behavior
>> > > > leads to mountpoints that become unusable.
>> > > 
>> > > That's right.
>> > > 
>> > > It's also worth considering that symmetric mount propagation is usually
>> > > not
>> > > the
>> > > behaviour needed either and things like LXC and Docker are set propagation
>> > > slave
>> > > because of problems caused by propagation back to the parent namespace.
>> > > 
>> > > So a mount can be triggered within a container, mounted by the automount
>> > > daemon
>> > > in the parent namespace, and propagated to the child and similarly for
>> > > expires,
>> > > which is the common use case now.
>> > > 
>> > > > 
>> > > > Which leads to the question what is the expected new behavior with your
>> > > > patchset applied.  New mounts can be added in the parent mount namespace
>> > > > (because the test is local).  Does your change also allow the
>> > > > autofs mountpoints to be used in the other mount namespaces that share
>> > > > the autofs instance if everything becomes unmounted?
>> > > 
>> > > The problem occurs when the subordinate namespace doesn't deal with these
>> > > propagated mounts properly, although they can obviously be used by the
>> > > subordinate namespace.
>> > > 
>> > > > 
>> > > > Or is it expected that other mount namespaces that share an autofs
>> > > > instance will get changes in their mounts via mount propagation and if
>> > > > mount propagation is insufficient they are on their own.
>> > > 
>> > > Namespaces that receive updates via mount propagation from a parent will
>> > > continue to function as they do now.
>> > > 
>> > > Mounts that don't get updates via mount propagation will retain the mount
>> > > to
>> > > use
>> > > if they need to, as they would without this change, but the originating
>> > > namespace will also continue to function as expected.
>> > > 
>> > > The child namespace needs cleanup its mounts on exit, which it had to do
>> > > prior
>> > > to this change also.
>> > > 
>> > > > 
>> > > > I believe this is a question of how do notifications of the desire for
>> > > > an automount work after your change, and are those notifications
>> > > > consistent with your desired and/or expected behavior.
>> > > 
>> > > It sounds like you might be assuming the service receiving these cloned
>> > > mounts
>> > > actually wants to use them or is expecting them to behave like automount
>> > > mounts.
>> > > But that's not what I've seen and is not the way these cloned mounts
>> > > behave
>> > > without the change.
>> > > 
>> > > However, as has probably occurred to you by now, there is a semantic
>> > > change
>> > > with
>> > > this for namespaces that don't receive mount propogation.
>> > > 
>> > > If a mount request is triggered by an access in the subordinate namespace
>> > > for a
>> > > dentry that is already mounted in the parent namespace it will silently
>> > > fail
>> > > (in
>> > > that a mount won't appear in the subordinate namespace) rather than
>> > > getting
>> > > an
>> > > ELOOP error as it would now.
>> > > 
>> > > It's also the case that, if such a mount isn't already mounted, it will
>> > > cause a
>> > > mount to occur in the parent namespace. But that is also the way it is
>> > > without
>> > > the change.
>> > > 
>> > > TBH I don't know yet how to resolve that, ideally the cloned mounts would
>> > > not
>> > > appear in the subordinate namespace upon creation but that's also not
>> > > currently
>> > > possible to do and even if it was it would mean quite a change in to the
>> > > way
>> > > things behave now.
>> > > 
>> > > All in all I believe the change here solves a problem that needs to be
>> > > solved
>> > > without affecting normal usage at the expense of a small behaviour change
>> > > to
>> > > cases where automount isn't providing a mounting service.
>> > 
>> > That sounds like a reasonable semantic change.  Limiting the responses
>> > of the autofs mount path to what is present in the mount namespace
>> > of the program that actually performs the autofs mounts seems needed.
>> 
>> Indeed, yes.
>> 
>> > 
>> > In fact the entire local mount concept exists because I was solving a
>> > very similar problem for rename, unlink and rmdir.  Where a cloned mount
>> > namespace could cause a denial of service attack on the original
>> > mount namespace.
>> > 
>> > I don't know if this change makes sense for mount expiry.
>> 
>> Originally I thought it did but now I think your right, it won't actually make
>> a
>> difference.
>> 
>> Let me think a little more about it, I thought there was a reason I included
>> the
>> expire in the changes but I can't remember now.
>> 
>> It may be that originally I thought individual automount(8) instances within
>> containers could be affected by an instance of automount(8) in the root
>> namespace (and visa versa) but now I think these will all be isolated.
>
> I also thought that the autofs expire would continue to see the umounted mount
> and continue calling back to the daemon in an attempt to umount it.
>
> That isn't the case so I can drop the changes to the expire expire code as you
> recommend.

Sounds good.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 20, 2016, 4:50 p.m. UTC | #15
Ian Kent <raven@themaw.net> writes:

> On Sat, 2016-09-17 at 22:10 +0200, Mateusz Guzik wrote:
>> On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
>> > If an automount mount is clone(2)ed into a file system that is
>> > propagation private, when it later expires in the originating
>> > namespace subsequent calls to autofs ->d_automount() for that
>> > dentry in the original namespace will return ELOOP until the
>> > mount is manually umounted in the cloned namespace.
>> > 
>> > In the same way, if an autofs mount is triggered by automount(8)
>> > running within a container the dentry will be seen as mounted in
>> > the root init namespace and calls to ->d_automount() in that namespace
>> > will return ELOOP until the mount is umounted within the container.
>> > 
>> > Also, have_submounts() can return an incorect result when a mount
>> > exists in a namespace other than the one being checked.
>> > 
>> > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool
>> > rcu_walk)
>> >  
>> >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
>> >  			return 0;
>> > -		if (d_mountpoint(dentry))
>> > +		if (is_local_mountpoint(dentry))
>> >  			return 0;
>> >  		inode = d_inode_rcu(dentry);
>> >  		if (inode && S_ISLNK(inode->i_mode))
>> 
>> This change is within RCU lookup.
>> 
>> is_local_mountpoint may end up calling __is_local_mountpoint, which will
>> optionally take the namespace_sem lock, resulting in a splat:
>
> Yes, that's a serious problem I missed.
>
> snip ...
>
>> I don't know this code. Perhaps it will be perfectly fine performance wise to
>> just drop out of RCU lookup in this case.
>
> It's a bit worse than that.
>
> I think being able to continue the rcu-walk for an already mounted dentry that
> is not being expired is an important part of the performance improvement given
> by the series that added this.
>
> Can you confirm that Neil?
>
> But for the case here the existing test can allow rcu-walk to continue for a
> dentry that would attempt to trigger an automount so it's also a bug in the
> existing code.

I don't think the existing code is buggy.  As I read __follow_mount_rcu
if DCACHE_NEED_AUTOMOUNT is set on the dentry after return from d_manage
the code will break out of the rcu walk.

> Any thoughts on how we can handle this Neil, I'm having a bit of trouble working
> out how to resolve this one.

I believe in this case d_mountpoint is enough for the rcu walk case.  If
the mountpoint turns out not to be local __follow_mount_rcu will kick
out of the rcu walk as it will return false.  Because:
	return !mounted && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);

I am not quite certain about the non-rcu case.  That can't be
is_local_mountpoint as that is inside a spinlock and is_local_mountpoint
can sleep.  Perhaps d_mountpoint is simply correct for autofs4_d_manage.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 20, 2016, 10:44 p.m. UTC | #16
On Tue, 2016-09-20 at 11:50 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Sat, 2016-09-17 at 22:10 +0200, Mateusz Guzik wrote:
> > > On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> > > > If an automount mount is clone(2)ed into a file system that is
> > > > propagation private, when it later expires in the originating
> > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > dentry in the original namespace will return ELOOP until the
> > > > mount is manually umounted in the cloned namespace.
> > > > 
> > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > running within a container the dentry will be seen as mounted in
> > > > the root init namespace and calls to ->d_automount() in that namespace
> > > > will return ELOOP until the mount is umounted within the container.
> > > > 
> > > > Also, have_submounts() can return an incorect result when a mount
> > > > exists in a namespace other than the one being checked.
> > > > 
> > > > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry,
> > > > bool
> > > > rcu_walk)
> > > >  
> > > >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> > > >  			return 0;
> > > > -		if (d_mountpoint(dentry))
> > > > +		if (is_local_mountpoint(dentry))
> > > >  			return 0;
> > > >  		inode = d_inode_rcu(dentry);
> > > >  		if (inode && S_ISLNK(inode->i_mode))
> > > 
> > > This change is within RCU lookup.
> > > 
> > > is_local_mountpoint may end up calling __is_local_mountpoint, which will
> > > optionally take the namespace_sem lock, resulting in a splat:
> > 
> > Yes, that's a serious problem I missed.
> > 
> > snip ...
> > 
> > > I don't know this code. Perhaps it will be perfectly fine performance wise
> > > to
> > > just drop out of RCU lookup in this case.
> > 
> > It's a bit worse than that.
> > 
> > I think being able to continue the rcu-walk for an already mounted dentry
> > that
> > is not being expired is an important part of the performance improvement
> > given
> > by the series that added this.
> > 
> > Can you confirm that Neil?
> > 
> > But for the case here the existing test can allow rcu-walk to continue for a
> > dentry that would attempt to trigger an automount so it's also a bug in the
> > existing code.
> 
> I don't think the existing code is buggy.  As I read __follow_mount_rcu
> if DCACHE_NEED_AUTOMOUNT is set on the dentry after return from d_manage
> the code will break out of the rcu walk.

That's right.

I'm working on follow up patches now.

> 
> > Any thoughts on how we can handle this Neil, I'm having a bit of trouble
> > working
> > out how to resolve this one.
> 
> I believe in this case d_mountpoint is enough for the rcu walk case.  If
> the mountpoint turns out not to be local __follow_mount_rcu will kick
> out of the rcu walk as it will return false.  Because:
> 	return !mounted && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
> 
> I am not quite certain about the non-rcu case.  That can't be
> is_local_mountpoint as that is inside a spinlock and is_local_mountpoint
> can sleep.  Perhaps d_mountpoint is simply correct for autofs4_d_manage.

That's right, I think I have other mistakes there too.

I'm checking those too.

> 
> Eric
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 20, 2016, 11 p.m. UTC | #17
On Wed, 2016-09-21 at 06:44 +0800, Ian Kent wrote:
> On Tue, 2016-09-20 at 11:50 -0500, Eric W. Biederman wrote:
> > Ian Kent <raven@themaw.net> writes:
> > 
> > > On Sat, 2016-09-17 at 22:10 +0200, Mateusz Guzik wrote:
> > > > On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> > > > > If an automount mount is clone(2)ed into a file system that is
> > > > > propagation private, when it later expires in the originating
> > > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > > dentry in the original namespace will return ELOOP until the
> > > > > mount is manually umounted in the cloned namespace.
> > > > > 
> > > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > > running within a container the dentry will be seen as mounted in
> > > > > the root init namespace and calls to ->d_automount() in that namespace
> > > > > will return ELOOP until the mount is umounted within the container.
> > > > > 
> > > > > Also, have_submounts() can return an incorect result when a mount
> > > > > exists in a namespace other than the one being checked.
> > > > > 
> > > > > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry *dentry,
> > > > > bool
> > > > > rcu_walk)
> > > > >  
> > > > >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> > > > >  			return 0;
> > > > > -		if (d_mountpoint(dentry))
> > > > > +		if (is_local_mountpoint(dentry))
> > > > >  			return 0;
> > > > >  		inode = d_inode_rcu(dentry);
> > > > >  		if (inode && S_ISLNK(inode->i_mode))
> > > > 
> > > > This change is within RCU lookup.
> > > > 
> > > > is_local_mountpoint may end up calling __is_local_mountpoint, which will
> > > > optionally take the namespace_sem lock, resulting in a splat:
> > > 
> > > Yes, that's a serious problem I missed.
> > > 
> > > snip ...
> > > 
> > > > I don't know this code. Perhaps it will be perfectly fine performance
> > > > wise
> > > > to
> > > > just drop out of RCU lookup in this case.
> > > 
> > > It's a bit worse than that.
> > > 
> > > I think being able to continue the rcu-walk for an already mounted dentry
> > > that
> > > is not being expired is an important part of the performance improvement
> > > given
> > > by the series that added this.
> > > 
> > > Can you confirm that Neil?
> > > 
> > > But for the case here the existing test can allow rcu-walk to continue for
> > > a
> > > dentry that would attempt to trigger an automount so it's also a bug in
> > > the
> > > existing code.
> > 
> > I don't think the existing code is buggy.  As I read __follow_mount_rcu
> > if DCACHE_NEED_AUTOMOUNT is set on the dentry after return from d_manage
> > the code will break out of the rcu walk.
> 
> That's right.
> 
> I'm working on follow up patches now.
> 
> > 
> > > Any thoughts on how we can handle this Neil, I'm having a bit of trouble
> > > working
> > > out how to resolve this one.
> > 
> > I believe in this case d_mountpoint is enough for the rcu walk case.  If
> > the mountpoint turns out not to be local __follow_mount_rcu will kick
> > out of the rcu walk as it will return false.  Because:
> > 	return !mounted && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
> > 
> > I am not quite certain about the non-rcu case.  That can't be
> > is_local_mountpoint as that is inside a spinlock and is_local_mountpoint
> > can sleep.  Perhaps d_mountpoint is simply correct for autofs4_d_manage.
> 
> That's right, I think I have other mistakes there too.
> 
> I'm checking those too.

It may be that is_local_mountpoint() isn't the right thing to use for this.

When I originally set out to do this I used a lookup_mnt() type check which
should be sufficient in this case so I might need to go back to that.

> 
> > 
> > Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 22, 2016, 1:33 a.m. UTC | #18
On Wed, 2016-09-21 at 07:00 +0800, Ian Kent wrote:
> On Wed, 2016-09-21 at 06:44 +0800, Ian Kent wrote:
> > On Tue, 2016-09-20 at 11:50 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > On Sat, 2016-09-17 at 22:10 +0200, Mateusz Guzik wrote:
> > > > > On Wed, Sep 14, 2016 at 02:14:45PM +0800, Ian Kent wrote:
> > > > > > If an automount mount is clone(2)ed into a file system that is
> > > > > > propagation private, when it later expires in the originating
> > > > > > namespace subsequent calls to autofs ->d_automount() for that
> > > > > > dentry in the original namespace will return ELOOP until the
> > > > > > mount is manually umounted in the cloned namespace.
> > > > > > 
> > > > > > In the same way, if an autofs mount is triggered by automount(8)
> > > > > > running within a container the dentry will be seen as mounted in
> > > > > > the root init namespace and calls to ->d_automount() in that
> > > > > > namespace
> > > > > > will return ELOOP until the mount is umounted within the container.
> > > > > > 
> > > > > > Also, have_submounts() can return an incorect result when a mount
> > > > > > exists in a namespace other than the one being checked.
> > > > > > 
> > > > > > @@ -460,7 +460,7 @@ static int autofs4_d_manage(struct dentry
> > > > > > *dentry,
> > > > > > bool
> > > > > > rcu_walk)
> > > > > >  
> > > > > >  		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
> > > > > >  			return 0;
> > > > > > -		if (d_mountpoint(dentry))
> > > > > > +		if (is_local_mountpoint(dentry))
> > > > > >  			return 0;
> > > > > >  		inode = d_inode_rcu(dentry);
> > > > > >  		if (inode && S_ISLNK(inode->i_mode))
> > > > > 
> > > > > This change is within RCU lookup.
> > > > > 
> > > > > is_local_mountpoint may end up calling __is_local_mountpoint, which
> > > > > will
> > > > > optionally take the namespace_sem lock, resulting in a splat:
> > > > 
> > > > Yes, that's a serious problem I missed.
> > > > 
> > > > snip ...
> > > > 
> > > > > I don't know this code. Perhaps it will be perfectly fine performance
> > > > > wise
> > > > > to
> > > > > just drop out of RCU lookup in this case.
> > > > 
> > > > It's a bit worse than that.
> > > > 
> > > > I think being able to continue the rcu-walk for an already mounted
> > > > dentry
> > > > that
> > > > is not being expired is an important part of the performance improvement
> > > > given
> > > > by the series that added this.
> > > > 
> > > > Can you confirm that Neil?
> > > > 
> > > > But for the case here the existing test can allow rcu-walk to continue
> > > > for
> > > > a
> > > > dentry that would attempt to trigger an automount so it's also a bug in
> > > > the
> > > > existing code.
> > > 
> > > I don't think the existing code is buggy.  As I read __follow_mount_rcu
> > > if DCACHE_NEED_AUTOMOUNT is set on the dentry after return from d_manage
> > > the code will break out of the rcu walk.
> > 
> > That's right.
> > 
> > I'm working on follow up patches now.
> > 
> > > 
> > > > Any thoughts on how we can handle this Neil, I'm having a bit of trouble
> > > > working
> > > > out how to resolve this one.
> > > 
> > > I believe in this case d_mountpoint is enough for the rcu walk case.  If
> > > the mountpoint turns out not to be local __follow_mount_rcu will kick
> > > out of the rcu walk as it will return false.  Because:
> > > 	return !mounted && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
> > > 
> > > I am not quite certain about the non-rcu case.  That can't be
> > > is_local_mountpoint as that is inside a spinlock and is_local_mountpoint
> > > can sleep.  Perhaps d_mountpoint is simply correct for autofs4_d_manage.
> > 
> > That's right, I think I have other mistakes there too.
> > 
> > I'm checking those too.
> 
> It may be that is_local_mountpoint() isn't the right thing to use for this.
> 
> When I originally set out to do this I used a lookup_mnt() type check which
> should be sufficient in this case so I might need to go back to that.

Eric, Mateusz, I appreciate your spending time on this and particularly pointing
out my embarrassingly stupid is_local_mountpoint() usage mistake.

Please accept my apology for the inconvenience.

If all goes well (in testing) I'll have follow up patches to correct this fairly
soon.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 22, 2016, 3:43 p.m. UTC | #19
Ian Kent <raven@themaw.net> writes:

> Eric, Mateusz, I appreciate your spending time on this and particularly pointing
> out my embarrassingly stupid is_local_mountpoint() usage mistake.
>
> Please accept my apology for the inconvenience.
>
> If all goes well (in testing) I'll have follow up patches to correct this fairly
> soon.

Related question.  Do you happen to know how many mounts per mount
namespace tend to be used?  It looks like it is going to be wise to put
a configurable limit on that number.  And I would like the default to be
something high enough most people don't care.  I believe autofs is
likely where people tend to use the most mounts.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 23, 2016, 12:55 a.m. UTC | #20
On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > Eric, Mateusz, I appreciate your spending time on this and particularly
> > pointing
> > out my embarrassingly stupid is_local_mountpoint() usage mistake.
> > 
> > Please accept my apology for the inconvenience.
> > 
> > If all goes well (in testing) I'll have follow up patches to correct this
> > fairly
> > soon.
> 
> Related question.  Do you happen to know how many mounts per mount
> namespace tend to be used?  It looks like it is going to be wise to put
> a configurable limit on that number.  And I would like the default to be
> something high enough most people don't care.  I believe autofs is
> likely where people tend to use the most mounts.

That's a good question.

I've been thinking that maybe I should have used a lookup_mnt() type check as I
originally started out to, for this reason, as the mnt_namespace list looks to
be a linear list.

But there can be a lot of mounts, and not only due to autofs, so maybe that
should be considered anyway.

The number of mounts for direct mount maps is usually not very large because of
the way they are implemented, large direct mount maps can have performance
problems. There can be anywhere from a few (likely case a few hundred) to less
than 10000, plus mounts that have been triggered and not yet expired.

Indirect mounts have one autofs mount at the root plus the number of mounts that
have been triggered and not yet expired.

The number of autofs indirect map entries can range from a few to the common
case of several thousand and in rare cases up to between 30000 and 50000. I've
not heard of people with maps larger than 50000 entries.

The larger the number of map entries the greater the possibility for a large
number of active mounts so it's not hard to expect cases of a 1000 or somewhat
more active mounts.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 23, 2016, 1:37 a.m. UTC | #21
Ian Kent <raven@themaw.net> writes:

> On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> > Eric, Mateusz, I appreciate your spending time on this and particularly
>> > pointing
>> > out my embarrassingly stupid is_local_mountpoint() usage mistake.
>> > 
>> > Please accept my apology for the inconvenience.
>> > 
>> > If all goes well (in testing) I'll have follow up patches to correct this
>> > fairly
>> > soon.
>> 
>> Related question.  Do you happen to know how many mounts per mount
>> namespace tend to be used?  It looks like it is going to be wise to put
>> a configurable limit on that number.  And I would like the default to be
>> something high enough most people don't care.  I believe autofs is
>> likely where people tend to use the most mounts.
>
> That's a good question.
>
> I've been thinking that maybe I should have used a lookup_mnt() type check as I
> originally started out to, for this reason, as the mnt_namespace list looks to
> be a linear list.
>
> But there can be a lot of mounts, and not only due to autofs, so maybe that
> should be considered anyway.

There are two reasons for is_local_mountpoint being the way it is.

a) For the cases where you don't have the parent mount.
b) For the cases where you want to stop things if something is mounted
   on a dentry in the local mount namespace even if it isn't mounted
   on that dentry at your current mount parent. (Something that was
   important to not change the semantics of the single mount namespace case).

Both of those cases to apply to unlink, rmdir, and rename.  I don't think
either of those cases apply to what autofs is trying to do.  Certainly
not the second.

So if you have the parent mount I really think you want to be using some
variation of lookup_mnt().  The fact it is rcu safe may help with some
of those weird corner cases as well.

> The number of mounts for direct mount maps is usually not very large because of
> the way they are implemented, large direct mount maps can have performance
> problems. There can be anywhere from a few (likely case a few hundred) to less
> than 10000, plus mounts that have been triggered and not yet expired.
>
> Indirect mounts have one autofs mount at the root plus the number of mounts that
> have been triggered and not yet expired.
>
> The number of autofs indirect map entries can range from a few to the common
> case of several thousand and in rare cases up to between 30000 and 50000. I've
> not heard of people with maps larger than 50000 entries.
>
> The larger the number of map entries the greater the possibility for a large
> number of active mounts so it's not hard to expect cases of a 1000 or somewhat
> more active mounts.

Fair.  So at least 1000.  And there can be enough mounts that a limit of
100,000 might be necessary to give head room for the existings configurations.

Thank you.  Now I just need to wrap my head around fs/namespace.c again
and see how bad a count like that will be to maintain.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 23, 2016, 4:26 a.m. UTC | #22
On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > Eric, Mateusz, I appreciate your spending time on this and particularly
> > > > pointing
> > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
> > > > 
> > > > Please accept my apology for the inconvenience.
> > > > 
> > > > If all goes well (in testing) I'll have follow up patches to correct
> > > > this
> > > > fairly
> > > > soon.
> > > 
> > > Related question.  Do you happen to know how many mounts per mount
> > > namespace tend to be used?  It looks like it is going to be wise to put
> > > a configurable limit on that number.  And I would like the default to be
> > > something high enough most people don't care.  I believe autofs is
> > > likely where people tend to use the most mounts.

Yes, I agree, I did want to try and avoid changing the parameters to
->d_mamange() but passing a struct path pointer might be better in the long run
anyway.


> > That's a good question.
> > 
> > I've been thinking that maybe I should have used a lookup_mnt() type check
> > as I
> > originally started out to, for this reason, as the mnt_namespace list looks
> > to
> > be a linear list.
> > 
> > But there can be a lot of mounts, and not only due to autofs, so maybe that
> > should be considered anyway.
> 
> There are two reasons for is_local_mountpoint being the way it is.
> 
> a) For the cases where you don't have the parent mount.
> b) For the cases where you want to stop things if something is mounted
>    on a dentry in the local mount namespace even if it isn't mounted
>    on that dentry at your current mount parent. (Something that was
>    important to not change the semantics of the single mount namespace case).
> 
> Both of those cases to apply to unlink, rmdir, and rename.  I don't think
> either of those cases apply to what autofs is trying to do.  Certainly
> not the second.
> 
> So if you have the parent mount I really think you want to be using some
> variation of lookup_mnt().  The fact it is rcu safe may help with some
> of those weird corner cases as well.
> 
> > The number of mounts for direct mount maps is usually not very large because
> > of
> > the way they are implemented, large direct mount maps can have performance
> > problems. There can be anywhere from a few (likely case a few hundred) to
> > less
> > than 10000, plus mounts that have been triggered and not yet expired.
> > 
> > Indirect mounts have one autofs mount at the root plus the number of mounts
> > that
> > have been triggered and not yet expired.
> > 
> > The number of autofs indirect map entries can range from a few to the common
> > case of several thousand and in rare cases up to between 30000 and 50000.
> > I've
> > not heard of people with maps larger than 50000 entries.
> > 
> > The larger the number of map entries the greater the possibility for a large
> > number of active mounts so it's not hard to expect cases of a 1000 or
> > somewhat
> > more active mounts.
> 
> Fair.  So at least 1000.  And there can be enough mounts that a limit of
> 100,000 might be necessary to give head room for the existings configurations.
> 
> Thank you.  Now I just need to wrap my head around fs/namespace.c again
> and see how bad a count like that will be to maintain.
> 
> Eric
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 23, 2016, noon UTC | #23
On Fri, 2016-09-23 at 12:26 +0800, Ian Kent wrote:
> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> > Ian Kent <raven@themaw.net> writes:
> > 
> > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > > Ian Kent <raven@themaw.net> writes:
> > > > 
> > > > > Eric, Mateusz, I appreciate your spending time on this and
> > > > > particularly
> > > > > pointing
> > > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
> > > > > 
> > > > > Please accept my apology for the inconvenience.
> > > > > 
> > > > > If all goes well (in testing) I'll have follow up patches to correct
> > > > > this
> > > > > fairly
> > > > > soon.
> > > > 
> > > > Related question.  Do you happen to know how many mounts per mount
> > > > namespace tend to be used?  It looks like it is going to be wise to put
> > > > a configurable limit on that number.  And I would like the default to be
> > > > something high enough most people don't care.  I believe autofs is
> > > > likely where people tend to use the most mounts.
> 
> Yes, I agree, I did want to try and avoid changing the parameters to
> ->d_mamange() but passing a struct path pointer might be better in the long
> run
> anyway.
> 

Andrew, could you please drop patches for this series.

I believe they are:
fs-make-is_local_mountpoint-usable-by-others.patch
fs-add-have_local_submounts.patch
autofs-make-mountpoint-checks-namespace-aware.patch
fs-remove-unused-have_submounts-function.patch

I'm going to have a go at what Eric and I discussed above rather than update
this series.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 23, 2016, 7:15 p.m. UTC | #24
Ian Kent <raven@themaw.net> writes:

2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
>> > > Ian Kent <raven@themaw.net> writes:
>> > > 
>> > > > Eric, Mateusz, I appreciate your spending time on this and particularly
>> > > > pointing
>> > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
>> > > > 
>> > > > Please accept my apology for the inconvenience.
>> > > > 
>> > > > If all goes well (in testing) I'll have follow up patches to correct
>> > > > this
>> > > > fairly
>> > > > soon.
>> > > 
>> > > Related question.  Do you happen to know how many mounts per mount
>> > > namespace tend to be used?  It looks like it is going to be wise to put
>> > > a configurable limit on that number.  And I would like the default to be
>> > > something high enough most people don't care.  I believe autofs is
>> > > likely where people tend to use the most mounts.
>
> Yes, I agree, I did want to try and avoid changing the parameters to
> ->d_mamange() but passing a struct path pointer might be better in the long run
> anyway.

Given that there is exactly one implementation of d_manage in the tree I
don't imagine it will be disruptive to change that.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 24, 2016, 12:11 a.m. UTC | #25
On Fri, 2016-09-23 at 14:15 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> 2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > > > Ian Kent <raven@themaw.net> writes:
> > > > > 
> > > > > > Eric, Mateusz, I appreciate your spending time on this and
> > > > > > particularly
> > > > > > pointing
> > > > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
> > > > > > 
> > > > > > Please accept my apology for the inconvenience.
> > > > > > 
> > > > > > If all goes well (in testing) I'll have follow up patches to correct
> > > > > > this
> > > > > > fairly
> > > > > > soon.
> > > > > 
> > > > > Related question.  Do you happen to know how many mounts per mount
> > > > > namespace tend to be used?  It looks like it is going to be wise to
> > > > > put
> > > > > a configurable limit on that number.  And I would like the default to
> > > > > be
> > > > > something high enough most people don't care.  I believe autofs is
> > > > > likely where people tend to use the most mounts.
> > 
> > Yes, I agree, I did want to try and avoid changing the parameters to
> > ->d_mamange() but passing a struct path pointer might be better in the long
> > run
> > anyway.
> 
> Given that there is exactly one implementation of d_manage in the tree I
> don't imagine it will be disruptive to change that.

Yes, but it could be used by external modules.

And there's also have_submounts().

I can update that using the existing d_walk() infrastructure or take it (mostly)
into the autofs module and get rid of have_submounts().

I'll go with the former to start with and see what people think.

> 
> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 26, 2016, 4:05 p.m. UTC | #26
Ian Kent <raven@themaw.net> writes:

> On Fri, 2016-09-23 at 14:15 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> 2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
>> > > Ian Kent <raven@themaw.net> writes:
>> > > 
>> > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
>> > > > > Ian Kent <raven@themaw.net> writes:
>> > > > > 
>> > > > > > Eric, Mateusz, I appreciate your spending time on this and
>> > > > > > particularly
>> > > > > > pointing
>> > > > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
>> > > > > > 
>> > > > > > Please accept my apology for the inconvenience.
>> > > > > > 
>> > > > > > If all goes well (in testing) I'll have follow up patches to correct
>> > > > > > this
>> > > > > > fairly
>> > > > > > soon.
>> > > > > 
>> > > > > Related question.  Do you happen to know how many mounts per mount
>> > > > > namespace tend to be used?  It looks like it is going to be wise to
>> > > > > put
>> > > > > a configurable limit on that number.  And I would like the default to
>> > > > > be
>> > > > > something high enough most people don't care.  I believe autofs is
>> > > > > likely where people tend to use the most mounts.
>> > 
>> > Yes, I agree, I did want to try and avoid changing the parameters to
>> > ->d_mamange() but passing a struct path pointer might be better in the long
>> > run
>> > anyway.
>> 
>> Given that there is exactly one implementation of d_manage in the tree I
>> don't imagine it will be disruptive to change that.
>
> Yes, but it could be used by external modules.
>
> And there's also have_submounts().

Good point about have_submounts.

> I can update that using the existing d_walk() infrastructure or take it (mostly)
> into the autofs module and get rid of have_submounts().
>
> I'll go with the former to start with and see what people think.

That will be interesting to so.  It is not clear to me that if d_walk
needs to be updated, and if d_walk doesn't need to be updated I would
be surprised to see it take into autofs.  But I am happy to look at the
end result and see what you come up with.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 27, 2016, 1:52 a.m. UTC | #27
On Mon, 2016-09-26 at 11:05 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Fri, 2016-09-23 at 14:15 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > 2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> > > > > Ian Kent <raven@themaw.net> writes:
> > > > > 
> > > > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > > > > > Ian Kent <raven@themaw.net> writes:
> > > > > > > 
> > > > > > > > Eric, Mateusz, I appreciate your spending time on this and
> > > > > > > > particularly
> > > > > > > > pointing
> > > > > > > > out my embarrassingly stupid is_local_mountpoint() usage
> > > > > > > > mistake.
> > > > > > > > 
> > > > > > > > Please accept my apology for the inconvenience.
> > > > > > > > 
> > > > > > > > If all goes well (in testing) I'll have follow up patches to
> > > > > > > > correct
> > > > > > > > this
> > > > > > > > fairly
> > > > > > > > soon.
> > > > > > > 
> > > > > > > Related question.  Do you happen to know how many mounts per mount
> > > > > > > namespace tend to be used?  It looks like it is going to be wise
> > > > > > > to
> > > > > > > put
> > > > > > > a configurable limit on that number.  And I would like the default
> > > > > > > to
> > > > > > > be
> > > > > > > something high enough most people don't care.  I believe autofs is
> > > > > > > likely where people tend to use the most mounts.
> > > > 
> > > > Yes, I agree, I did want to try and avoid changing the parameters to
> > > > ->d_mamange() but passing a struct path pointer might be better in the
> > > > long
> > > > run
> > > > anyway.
> > > 
> > > Given that there is exactly one implementation of d_manage in the tree I
> > > don't imagine it will be disruptive to change that.
> > 
> > Yes, but it could be used by external modules.
> > 
> > And there's also have_submounts().
> 
> Good point about have_submounts.
> 
> > I can update that using the existing d_walk() infrastructure or take it
> > (mostly)
> > into the autofs module and get rid of have_submounts().
> > 
> > I'll go with the former to start with and see what people think.
> 
> That will be interesting to so.  It is not clear to me that if d_walk
> needs to be updated, and if d_walk doesn't need to be updated I would
> be surprised to see it take into autofs.  But I am happy to look at the
> end result and see what you come up with.

I didn't mean d_walk() itself, just the have_submounts() function that's used
only by autofs these days. That's all I'll be changing.

To take this functionality into the autofs module shouldn't be a big deal as it
amounts to a directory traversal with a check at each node.

But I vaguely remember talk of wanting to get rid of have_submounts() and autofs
being the only remaining user.

So I mentioned it to try and elicit a comment, ;)

> 
> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Sept. 27, 2016, 1:14 p.m. UTC | #28
Ian Kent <raven@themaw.net> writes:

> On Mon, 2016-09-26 at 11:05 -0500, Eric W. Biederman wrote:
>> Ian Kent <raven@themaw.net> writes:
>> 
>> > On Fri, 2016-09-23 at 14:15 -0500, Eric W. Biederman wrote:
>> > > Ian Kent <raven@themaw.net> writes:
>> > > 
>> > > 2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
>> > > > > Ian Kent <raven@themaw.net> writes:
>> > > > > 
>> > > > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
>> > > > > > > Ian Kent <raven@themaw.net> writes:
>> > > > > > > 
>> > > > > > > > Eric, Mateusz, I appreciate your spending time on this and
>> > > > > > > > particularly
>> > > > > > > > pointing
>> > > > > > > > out my embarrassingly stupid is_local_mountpoint() usage
>> > > > > > > > mistake.
>> > > > > > > > 
>> > > > > > > > Please accept my apology for the inconvenience.
>> > > > > > > > 
>> > > > > > > > If all goes well (in testing) I'll have follow up patches to
>> > > > > > > > correct
>> > > > > > > > this
>> > > > > > > > fairly
>> > > > > > > > soon.
>> > > > > > > 
>> > > > > > > Related question.  Do you happen to know how many mounts per mount
>> > > > > > > namespace tend to be used?  It looks like it is going to be wise
>> > > > > > > to
>> > > > > > > put
>> > > > > > > a configurable limit on that number.  And I would like the default
>> > > > > > > to
>> > > > > > > be
>> > > > > > > something high enough most people don't care.  I believe autofs is
>> > > > > > > likely where people tend to use the most mounts.
>> > > > 
>> > > > Yes, I agree, I did want to try and avoid changing the parameters to
>> > > > ->d_mamange() but passing a struct path pointer might be better in the
>> > > > long
>> > > > run
>> > > > anyway.
>> > > 
>> > > Given that there is exactly one implementation of d_manage in the tree I
>> > > don't imagine it will be disruptive to change that.
>> > 
>> > Yes, but it could be used by external modules.
>> > 
>> > And there's also have_submounts().
>> 
>> Good point about have_submounts.
>> 
>> > I can update that using the existing d_walk() infrastructure or take it
>> > (mostly)
>> > into the autofs module and get rid of have_submounts().
>> > 
>> > I'll go with the former to start with and see what people think.
>> 
>> That will be interesting to so.  It is not clear to me that if d_walk
>> needs to be updated, and if d_walk doesn't need to be updated I would
>> be surprised to see it take into autofs.  But I am happy to look at the
>> end result and see what you come up with.
>
> I didn't mean d_walk() itself, just the have_submounts() function that's used
> only by autofs these days. That's all I'll be changing.
>
> To take this functionality into the autofs module shouldn't be a big deal as it
> amounts to a directory traversal with a check at each node.
>
> But I vaguely remember talk of wanting to get rid of have_submounts() and autofs
> being the only remaining user.
>
> So I mentioned it to try and elicit a comment, ;)

From my perspective the key detail is that d_walk is private to dcache.c

That said you want to look at may_umount_tree, or may_umount that
are already exported from fs/namespace.c, and already used by autofs.
One of those may already do the job you are trying to do.

It looks like at least may_umount needs to be examined to see if it does
the right thing for the tweaked semantics of autofs.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 28, 2016, 12:19 a.m. UTC | #29
On Tue, 2016-09-27 at 08:14 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > On Mon, 2016-09-26 at 11:05 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > On Fri, 2016-09-23 at 14:15 -0500, Eric W. Biederman wrote:
> > > > > Ian Kent <raven@themaw.net> writes:
> > > > > 
> > > > > 2> On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> > > > > > > Ian Kent <raven@themaw.net> writes:
> > > > > > > 
> > > > > > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > > > > > > > Ian Kent <raven@themaw.net> writes:
> > > > > > > > > 
> > > > > > > > > > Eric, Mateusz, I appreciate your spending time on this and
> > > > > > > > > > particularly
> > > > > > > > > > pointing
> > > > > > > > > > out my embarrassingly stupid is_local_mountpoint() usage
> > > > > > > > > > mistake.
> > > > > > > > > > 
> > > > > > > > > > Please accept my apology for the inconvenience.
> > > > > > > > > > 
> > > > > > > > > > If all goes well (in testing) I'll have follow up patches to
> > > > > > > > > > correct
> > > > > > > > > > this
> > > > > > > > > > fairly
> > > > > > > > > > soon.
> > > > > > > > > 
> > > > > > > > > Related question.  Do you happen to know how many mounts per
> > > > > > > > > mount
> > > > > > > > > namespace tend to be used?  It looks like it is going to be
> > > > > > > > > wise
> > > > > > > > > to
> > > > > > > > > put
> > > > > > > > > a configurable limit on that number.  And I would like the
> > > > > > > > > default
> > > > > > > > > to
> > > > > > > > > be
> > > > > > > > > something high enough most people don't care.  I believe
> > > > > > > > > autofs is
> > > > > > > > > likely where people tend to use the most mounts.
> > > > > > 
> > > > > > Yes, I agree, I did want to try and avoid changing the parameters to
> > > > > > ->d_mamange() but passing a struct path pointer might be better in
> > > > > > the
> > > > > > long
> > > > > > run
> > > > > > anyway.
> > > > > 
> > > > > Given that there is exactly one implementation of d_manage in the tree
> > > > > I
> > > > > don't imagine it will be disruptive to change that.
> > > > 
> > > > Yes, but it could be used by external modules.
> > > > 
> > > > And there's also have_submounts().
> > > 
> > > Good point about have_submounts.
> > > 
> > > > I can update that using the existing d_walk() infrastructure or take it
> > > > (mostly)
> > > > into the autofs module and get rid of have_submounts().
> > > > 
> > > > I'll go with the former to start with and see what people think.
> > > 
> > > That will be interesting to so.  It is not clear to me that if d_walk
> > > needs to be updated, and if d_walk doesn't need to be updated I would
> > > be surprised to see it take into autofs.  But I am happy to look at the
> > > end result and see what you come up with.
> > 
> > I didn't mean d_walk() itself, just the have_submounts() function that's
> > used
> > only by autofs these days. That's all I'll be changing.
> > 
> > To take this functionality into the autofs module shouldn't be a big deal as
> > it
> > amounts to a directory traversal with a check at each node.
> > 
> > But I vaguely remember talk of wanting to get rid of have_submounts() and
> > autofs
> > being the only remaining user.
> > 
> > So I mentioned it to try and elicit a comment, ;)
> 
> From my perspective the key detail is that d_walk is private to dcache.c
> 
> That said you want to look at may_umount_tree, or may_umount that
> are already exported from fs/namespace.c, and already used by autofs.
> One of those may already do the job you are trying to do.

Right, I'm aware of them, autofs uses may_umount() when asking if an autofs
mount can be umounted, and it uses may_umount_tree() to check busyness in its
expire.

may_umount_tree() (and may_umount()) won't tell you if there are any mounts
within the directory tree, only that there is an elevated reference count, for
example an open file or working directory etc., ie. busy.

OTOH, have_submounts() will return true as soon as it encounters a
d_mountpoint() dentry in the directory tree which is what's needed where it's
used and why it can return a false positive for the problem case.

I get that a function, say, path_has_submounts() probably shouldn't be placed in
dcache.c and that's another reason to take the traversal into autofs. 

But if there's no word from Al on that I'd prefer to use d_walk() and put it
there.

Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ian Kent Sept. 28, 2016, 10:18 a.m. UTC | #30
On Fri, 2016-09-23 at 20:00 +0800, Ian Kent wrote:
> On Fri, 2016-09-23 at 12:26 +0800, Ian Kent wrote:
> > On Thu, 2016-09-22 at 20:37 -0500, Eric W. Biederman wrote:
> > > Ian Kent <raven@themaw.net> writes:
> > > 
> > > > On Thu, 2016-09-22 at 10:43 -0500, Eric W. Biederman wrote:
> > > > > Ian Kent <raven@themaw.net> writes:
> > > > > 
> > > > > > Eric, Mateusz, I appreciate your spending time on this and
> > > > > > particularly
> > > > > > pointing
> > > > > > out my embarrassingly stupid is_local_mountpoint() usage mistake.
> > > > > > 
> > > > > > Please accept my apology for the inconvenience.
> > > > > > 
> > > > > > If all goes well (in testing) I'll have follow up patches to correct
> > > > > > this
> > > > > > fairly
> > > > > > soon.
> > > > > 
> > > > > Related question.  Do you happen to know how many mounts per mount
> > > > > namespace tend to be used?  It looks like it is going to be wise to
> > > > > put
> > > > > a configurable limit on that number.  And I would like the default to
> > > > > be
> > > > > something high enough most people don't care.  I believe autofs is
> > > > > likely where people tend to use the most mounts.
> > 
> > Yes, I agree, I did want to try and avoid changing the parameters to
> > ->d_mamange() but passing a struct path pointer might be better in the long
> > run
> > anyway.
> > 
> 
> Andrew, could you please drop patches for this series.
> 
> I believe they are:
> fs-make-is_local_mountpoint-usable-by-others.patch
> fs-add-have_local_submounts.patch
> autofs-make-mountpoint-checks-namespace-aware.patch
> fs-remove-unused-have_submounts-function.patch
> 
> I'm going to have a go at what Eric and I discussed above rather than update
> this series.

There are a couple of problems I see preventing me from posting an updated
series.

It looks like this series was dropped from the mmotm tree but is still present
in the linux-next tree.

Hopefully this won't be pushed to the Linux tree, it's not likely to be what's
needed.

Also there are two commits in the Linus tree that will conflict with an updated
series which is worth a heads up.

They are:

commit 8ac790f312 from Al Viro
title: qstr: constify instances in autofs4

commit e698b8a436 from Miklos Szeredi
title: vfs: document ->d_real()

Is there anything I should do to help with this?
Ian
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index c7fcc74..0024e25 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -564,7 +564,7 @@  static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 
 		devid = new_encode_dev(dev);
 
-		err = have_submounts(path.dentry);
+		err = have_local_submounts(path.dentry);
 
 		if (follow_down_one(&path))
 			magic = path.dentry->d_sb->s_magic;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d8e6d42..7cc34ef 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -236,7 +236,7 @@  static int autofs4_tree_busy(struct vfsmount *mnt,
 		 * count for the autofs dentry.
 		 * If the fs is busy update the expiry counter.
 		 */
-		if (d_mountpoint(p)) {
+		if (is_local_mountpoint(p)) {
 			if (autofs4_mount_busy(mnt, p)) {
 				top_ino->last_used = jiffies;
 				dput(p);
@@ -280,7 +280,7 @@  static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 	while ((p = get_next_positive_dentry(p, parent))) {
 		pr_debug("dentry %p %pd\n", p, p);
 
-		if (d_mountpoint(p)) {
+		if (is_local_mountpoint(p)) {
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, p))
 				continue;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index fa84bb8..4150ad6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -123,7 +123,7 @@  static int autofs4_dir_open(struct inode *inode, struct file *file)
 	 * it.
 	 */
 	spin_lock(&sbi->lookup_lock);
-	if (!d_mountpoint(dentry) && simple_empty(dentry)) {
+	if (!is_local_mountpoint(dentry) && simple_empty(dentry)) {
 		spin_unlock(&sbi->lookup_lock);
 		return -ENOENT;
 	}
@@ -370,28 +370,28 @@  static struct vfsmount *autofs4_d_automount(struct path *path)
 
 	/*
 	 * If the dentry is a symlink it's equivalent to a directory
-	 * having d_mountpoint() true, so there's no need to call back
-	 * to the daemon.
+	 * having is_local_mountpoint() true, so there's no need to
+	 * call back to the daemon.
 	 */
 	if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
 		spin_unlock(&sbi->fs_lock);
 		goto done;
 	}
 
-	if (!d_mountpoint(dentry)) {
+	if (!is_local_mountpoint(dentry)) {
 		/*
 		 * It's possible that user space hasn't removed directories
 		 * after umounting a rootless multi-mount, although it
-		 * should. For v5 have_submounts() is sufficient to handle
-		 * this because the leaves of the directory tree under the
-		 * mount never trigger mounts themselves (they have an autofs
-		 * trigger mount mounted on them). But v4 pseudo direct mounts
-		 * do need the leaves to trigger mounts. In this case we
-		 * have no choice but to use the list_empty() check and
-		 * require user space behave.
+		 * should. For v5 have_local_submounts() is sufficient to
+		 * handle this because the leaves of the directory tree under
+		 * the mount never trigger mounts themselves (they have an
+		 * autofs trigger mount mounted on them). But v4 pseudo
+		 * direct mounts do need the leaves to trigger mounts. In
+		 * this case we have no choice but to use the list_empty()
+		 * check and require user space behave.
 		 */
 		if (sbi->version > 4) {
-			if (have_submounts(dentry)) {
+			if (have_local_submounts(dentry)) {
 				spin_unlock(&sbi->fs_lock);
 				goto done;
 			}
@@ -431,7 +431,7 @@  static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 
 	/* The daemon never waits. */
 	if (autofs4_oz_mode(sbi)) {
-		if (!d_mountpoint(dentry))
+		if (!is_local_mountpoint(dentry))
 			return -EISDIR;
 		return 0;
 	}
@@ -460,7 +460,7 @@  static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 
 		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			return 0;
-		if (d_mountpoint(dentry))
+		if (is_local_mountpoint(dentry))
 			return 0;
 		inode = d_inode_rcu(dentry);
 		if (inode && S_ISLNK(inode->i_mode))
@@ -487,7 +487,7 @@  static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 * we can avoid needless calls ->d_automount() and avoid
 		 * an incorrect ELOOP error return.
 		 */
-		if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
+		if ((!is_local_mountpoint(dentry) && !simple_empty(dentry)) ||
 		    (d_really_is_positive(dentry) && d_is_symlink(dentry)))
 			status = -EISDIR;
 	}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 431fd7e..911f4d5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -333,7 +333,7 @@  static int validate_request(struct autofs_wait_queue **wait,
 					dentry = new;
 			}
 		}
-		if (have_submounts(dentry))
+		if (have_local_submounts(dentry))
 			valid = 0;
 
 		if (new)