diff mbox

mnt: Protect the mountpoint hashtable with mount_lock

Message ID 87shoz32g8.fsf_-_@xmission.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eric W. Biederman Jan. 4, 2017, 3:53 a.m. UTC
Protecting the mountpoint hashtable with namespace_sem was sufficient
until a call to umount_mnt was added to mntput_no_expire.  At which
point it became possible for multiple calls of put_mountpoint on
the same hash chain to happen on the same time.

Kristen Johansen <kjlx@templeofstupid.com> reported:
> This can cause a panic when simultaneous callers of put_mountpoint
> attempt to free the same mountpoint.  This occurs because some callers
> hold the mount_hash_lock, while others hold the namespace lock.  Some
> even hold both.
>
> In this submitter's case, the panic manifested itself as a GP fault in
> put_mountpoint() when it called hlist_del() and attempted to dereference
> a m_hash.pprev that had been poisioned by another thread.

Al Viro observed that the simple fix is to switch from using the namespace_sem
to the mount_lock to protect the mountpoint hash table.

I have taken Al's suggested patch moved put_mountpoint in pivot_root
(instead of taking mount_lock an additional time), and have replaced
new_mountpoint with get_mountpoint a function that does the hash table
lookup and addition under the mount_lock.   The introduction of get_mounptoint
ensures that only the mount_lock is needed to manipulate the mountpoint
hashtable.

d_set_mounted is modified to only set DCACHE_MOUNTED if it is not
already set.  This allows get_mountpoint to use the setting of
DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry
happens exactly once.

Cc: stable@vger.kernel.org
Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts")
Reported-by: Krister Johansen <kjlx@templeofstupid.com>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/dcache.c    |  7 +++++--
 fs/namespace.c | 64 +++++++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 50 insertions(+), 21 deletions(-)

Comments

Krister Johansen Jan. 6, 2017, 7 a.m. UTC | #1
On Wed, Jan 04, 2017 at 04:53:59PM +1300, Eric W. Biederman wrote:
> 
> Protecting the mountpoint hashtable with namespace_sem was sufficient
> until a call to umount_mnt was added to mntput_no_expire.  At which
> point it became possible for multiple calls of put_mountpoint on
> the same hash chain to happen on the same time.
> 
> Kristen Johansen <kjlx@templeofstupid.com> reported:
> > This can cause a panic when simultaneous callers of put_mountpoint
> > attempt to free the same mountpoint.  This occurs because some callers
> > hold the mount_hash_lock, while others hold the namespace lock.  Some
> > even hold both.
> >
> > In this submitter's case, the panic manifested itself as a GP fault in
> > put_mountpoint() when it called hlist_del() and attempted to dereference
> > a m_hash.pprev that had been poisioned by another thread.
> 
> Al Viro observed that the simple fix is to switch from using the namespace_sem
> to the mount_lock to protect the mountpoint hash table.
> 
> I have taken Al's suggested patch moved put_mountpoint in pivot_root
> (instead of taking mount_lock an additional time), and have replaced
> new_mountpoint with get_mountpoint a function that does the hash table
> lookup and addition under the mount_lock.   The introduction of get_mounptoint
> ensures that only the mount_lock is needed to manipulate the mountpoint
> hashtable.
> 
> d_set_mounted is modified to only set DCACHE_MOUNTED if it is not
> already set.  This allows get_mountpoint to use the setting of
> DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry
> happens exactly once.
> 
> Cc: stable@vger.kernel.org
> Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts")
> Reported-by: Krister Johansen <kjlx@templeofstupid.com>
> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---

Sorry for the slow reply.

This looks right to me.  I just pulled in the patch and went through all
of the code paths in cscope.  Everything is now under the mount_lock,
which solves the problem from my perspective.  Feel free to put me down
as a reviewed-by if my vote counts.`

There's another issue with MNT_LOCKED and detached mounts that I've been
investigating.  I'd be curious to get your opinion before I write any
code.  I'll send that out in a separate e-mail, though.

-K
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/dcache.c b/fs/dcache.c
index 769903dbc19d..95d71eda8142 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1336,8 +1336,11 @@  int d_set_mounted(struct dentry *dentry)
 	}
 	spin_lock(&dentry->d_lock);
 	if (!d_unlinked(dentry)) {
-		dentry->d_flags |= DCACHE_MOUNTED;
-		ret = 0;
+		ret = -EBUSY;
+		if (!d_mountpoint(dentry)) {
+			dentry->d_flags |= DCACHE_MOUNTED;
+			ret = 0;
+		}
 	}
  	spin_unlock(&dentry->d_lock);
 out:
diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259e064f..487ba30bb5c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -742,26 +742,50 @@  static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 	return NULL;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
 {
-	struct hlist_head *chain = mp_hash(dentry);
-	struct mountpoint *mp;
+	struct mountpoint *mp, *new = NULL;
 	int ret;
 
-	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-	if (!mp)
+	if (d_mountpoint(dentry)) {
+mountpoint:
+		read_seqlock_excl(&mount_lock);
+		mp = lookup_mountpoint(dentry);
+		read_sequnlock_excl(&mount_lock);
+		if (mp)
+			goto done;
+	}
+
+	if (!new)
+		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	if (!new)
 		return ERR_PTR(-ENOMEM);
 
+
+	/* Exactly one processes may set d_mounted */
 	ret = d_set_mounted(dentry);
-	if (ret) {
-		kfree(mp);
-		return ERR_PTR(ret);
-	}
 
-	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
-	INIT_HLIST_HEAD(&mp->m_list);
+	/* Someone else set d_mounted? */
+	if (ret == -EBUSY)
+		goto mountpoint;
+
+	/* The dentry is not available as a mountpoint? */
+	mp = ERR_PTR(ret);
+	if (ret)
+		goto done;
+
+	/* Add the new mountpoint to the hash table */
+	read_seqlock_excl(&mount_lock);
+	new->m_dentry = dentry;
+	new->m_count = 1;
+	hlist_add_head(&new->m_hash, mp_hash(dentry));
+	INIT_HLIST_HEAD(&new->m_list);
+	read_sequnlock_excl(&mount_lock);
+
+	mp = new;
+	new = NULL;
+done:
+	kfree(new);
 	return mp;
 }
 
@@ -1595,11 +1619,11 @@  void __detach_mounts(struct dentry *dentry)
 	struct mount *mnt;
 
 	namespace_lock();
+	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
 	if (IS_ERR_OR_NULL(mp))
 		goto out_unlock;
 
-	lock_mount_hash();
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1633,9 @@  void __detach_mounts(struct dentry *dentry)
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
-	unlock_mount_hash();
 	put_mountpoint(mp);
 out_unlock:
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -2038,9 +2062,7 @@  static struct mountpoint *lock_mount(struct path *path)
 	namespace_lock();
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
-		struct mountpoint *mp = lookup_mountpoint(dentry);
-		if (!mp)
-			mp = new_mountpoint(dentry);
+		struct mountpoint *mp = get_mountpoint(dentry);
 		if (IS_ERR(mp)) {
 			namespace_unlock();
 			inode_unlock(dentry->d_inode);
@@ -2059,7 +2081,11 @@  static struct mountpoint *lock_mount(struct path *path)
 static void unlock_mount(struct mountpoint *where)
 {
 	struct dentry *dentry = where->m_dentry;
+
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(where);
+	read_sequnlock_excl(&mount_lock);
+
 	namespace_unlock();
 	inode_unlock(dentry->d_inode);
 }
@@ -3135,9 +3161,9 @@  SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	/* A moved mount should not expire automatically */
 	list_del_init(&new_mnt->mnt_expire);
+	put_mountpoint(root_mp);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
-	put_mountpoint(root_mp);
 	error = 0;
 out4:
 	unlock_mount(old_mp);