diff mbox

INFO: task hung in iterate_supers

Message ID 7dc64116-f43d-3739-7b35-4eef7a80e343@I-love.SAKURA.ne.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Tetsuo Handa July 11, 2018, 10:19 a.m. UTC
On 2018/07/10 19:34, Tetsuo Handa wrote:
> It turned out that, although the reason of stalling v9fs_mount() is currently
> unknown, the reason of many processes stuck at iterate_supers() is that
> they are unable to take s->s_umount object due to down_write_nested() below.
> 
> 	/*
> 	 * sget() can have s_umount recursion.
> 	 *
> 	 * When it cannot find a suitable sb, it allocates a new
> 	 * one (this one), and tries again to find a suitable old
> 	 * one.
> 	 *
> 	 * In case that succeeds, it will acquire the s_umount
> 	 * lock of the old one. Since these are clearly distrinct
> 	 * locks, and this object isn't exposed yet, there's no
> 	 * risk of deadlocks.
> 	 *
> 	 * Annotate this by putting this lock in a different
> 	 * subclass.
> 	 */
> 	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
> 

Guessing from commit dabe0dc194d5d56d ("vfs: fix the rest of sget() races"),
this is an overlooked sget() race.

iterate_supers() is calling down_read(&sb_s_umount) on all superblocks found by
traversing super_blocks list. But sget_userns() adds "newly created superblock
to super_blocks list with ->s_umount held by alloc_super() for write" before
"mount_fs() sets SB_BORN flag after returning from type->mount()".

Therefore, if type->mount() (e.g. v9fs_mount()) took long after sget() for
some reason, processes will be blocked on a superblock on SB_BORN not yet set.
Doing the test (which is done after ->s_umount is held) before holding ->s_umount
(patch shown below) works for me. Although there seems to be many bugs in v9fs
code, I think that making sure that other threads won't try to wait for superblock
with SB_BORN not yet set is better.

---
 fs/super.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)
diff mbox

Patch

diff --git a/fs/super.c b/fs/super.c
index 50728d9..03c3b3f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,11 @@ 
 #include <linux/user_namespace.h>
 #include "internal.h"
 
+static inline bool sb_is_alive(struct super_block *sb)
+{
+	return sb->s_root && (sb->s_flags & SB_BORN);
+}
+
 static int thaw_super_locked(struct super_block *sb);
 
 static LIST_HEAD(super_blocks);
@@ -407,8 +412,7 @@  static int grab_super(struct super_block *s) __releases(sb_lock)
 bool trylock_super(struct super_block *sb)
 {
 	if (down_read_trylock(&sb->s_umount)) {
-		if (!hlist_unhashed(&sb->s_instances) &&
-		    sb->s_root && (sb->s_flags & SB_BORN))
+		if (!hlist_unhashed(&sb->s_instances) && sb_is_alive(sb))
 			return true;
 		up_read(&sb->s_umount);
 	}
@@ -590,6 +594,8 @@  static void __iterate_supers(void (*f)(struct super_block *))
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb_is_alive(sb))
+			continue;
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -620,13 +626,15 @@  void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb_is_alive(sb))
+			continue;
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
 		down_read(&sb->s_umount);
-		if (sb->s_root && (sb->s_flags & SB_BORN))
+		if (sb_is_alive(sb))
 			f(sb, arg);
 		up_read(&sb->s_umount);
 
@@ -656,11 +664,13 @@  void iterate_supers_type(struct file_system_type *type,
 
 	spin_lock(&sb_lock);
 	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+		if (!sb_is_alive(sb))
+			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
 		down_read(&sb->s_umount);
-		if (sb->s_root && (sb->s_flags & SB_BORN))
+		if (sb_is_alive(sb))
 			f(sb, arg);
 		up_read(&sb->s_umount);
 
@@ -689,6 +699,8 @@  static struct super_block *__get_super(struct block_device *bdev, bool excl)
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_bdev == bdev) {
+			if (!sb_is_alive(sb))
+				continue;
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			if (!excl)
@@ -696,7 +708,7 @@  static struct super_block *__get_super(struct block_device *bdev, bool excl)
 			else
 				down_write(&sb->s_umount);
 			/* still alive? */
-			if (sb->s_root && (sb->s_flags & SB_BORN))
+			if (sb_is_alive(sb))
 				return sb;
 			if (!excl)
 				up_read(&sb->s_umount);
@@ -813,11 +825,13 @@  struct super_block *user_get_super(dev_t dev)
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_dev ==  dev) {
+			if (!sb_is_alive(sb))
+				continue;
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			/* still alive? */
-			if (sb->s_root && (sb->s_flags & SB_BORN))
+			if (sb_is_alive(sb))
 				return sb;
 			up_read(&sb->s_umount);
 			/* nope, got unmounted */
@@ -916,8 +930,7 @@  int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
 static void do_emergency_remount_callback(struct super_block *sb)
 {
 	down_write(&sb->s_umount);
-	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
-	    !sb_rdonly(sb)) {
+	if (sb_is_alive(sb) && sb->s_bdev && !sb_rdonly(sb)) {
 		/*
 		 * What lock protects sb->s_flags??
 		 */
@@ -947,7 +960,7 @@  void emergency_remount(void)
 static void do_thaw_all_callback(struct super_block *sb)
 {
 	down_write(&sb->s_umount);
-	if (sb->s_root && sb->s_flags & SB_BORN) {
+	if (sb_is_alive(sb)) {
 		emergency_thaw_bdev(sb);
 		thaw_super_locked(sb);
 	} else {