[7/7] shmem: fix quota lock nesting in huge hole handling

Message ID	20230725144510.253763-8-cem@kernel.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-fsdevel-owner@vger.kernel.org> From: cem@kernel.org To: linux-fsdevel@vger.kernel.org Cc: jack@suse.cz, akpm@linux-foundation.org, viro@zeniv.linux.org.uk, linux-mm@kvack.org, djwong@kernel.org, hughd@google.com, brauner@kernel.org, mcgrof@kernel.org Subject: [PATCH 7/7] shmem: fix quota lock nesting in huge hole handling Date: Tue, 25 Jul 2023 16:45:10 +0200 Message-Id: <20230725144510.253763-8-cem@kernel.org> In-Reply-To: <20230725144510.253763-1-cem@kernel.org> References: <20230725144510.253763-1-cem@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	shmem: Add user and group quota support for tmpfs \| expand [V6,0/7] shmem: Add user and group quota support for tmpfs [1/7] shmem: make shmem_inode_acct_block() return error [2/7] shmem: make shmem_get_inode() return ERR_PTR instead of NULL [3/7] quota: Check presence of quota operation structures instead of ->quota_read and ->quota_write… [4/7] shmem: prepare shmem quota infrastructure [5/7] shmem: quota support [6/7] shmem: Add default quota limit mount options [7/7] shmem: fix quota lock nesting in huge hole handling

Message ID

20230725144510.253763-8-cem@kernel.org (mailing list archive)

State

New, archived

Headers

From: cem@kernel.org
To: linux-fsdevel@vger.kernel.org
Cc: jack@suse.cz, akpm@linux-foundation.org, viro@zeniv.linux.org.uk,
        linux-mm@kvack.org, djwong@kernel.org, hughd@google.com,
        brauner@kernel.org, mcgrof@kernel.org
Subject: [PATCH 7/7] shmem: fix quota lock nesting in huge hole handling
Date: Tue, 25 Jul 2023 16:45:10 +0200
Message-Id: <20230725144510.253763-8-cem@kernel.org>
In-Reply-To: <20230725144510.253763-1-cem@kernel.org>
References: <20230725144510.253763-1-cem@kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

shmem: Add user and group quota support for tmpfs | expand

Commit Message

Carlos Maiolino July 25, 2023, 2:45 p.m. UTC

From: Hugh Dickins <hughd@google.com>

i_pages lock nests inside i_lock, but shmem_charge() and shmem_uncharge()
were being called from THP splitting or collapsing while i_pages lock was
held, and now go on to call dquot_alloc_block_nodirty() which takes
i_lock to update i_blocks.

We may well want to take i_lock out of this path later, in the non-quota
case even if it's left in the quota case (or perhaps use i_lock instead
of shmem's info->lock throughout); but don't get into that at this time.

Move the shmem_charge() and shmem_uncharge() calls out from under i_pages
lock, accounting the full batch of holes in a single call.

Still pass the pages argument to shmem_uncharge(), but it happens now to
be unused: shmem_recalc_inode() is designed to account for clean pages
freed behind shmem's back, so it gets the accounting right by itself;
then the later call to shmem_inode_unacct_blocks() led to imbalance
(that WARN_ON(inode->i_blocks) in shmem_evict_inode()).

Reported-by: syzbot+38ca19393fb3344f57e6@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/0000000000008e62f40600bfe080@google.com/
Reported-by: syzbot+440ff8cca06ee7a1d4db@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/00000000000076a7840600bfb6e8@google.com/
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Tested-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 mm/huge_memory.c |  6 ++++--
 mm/khugepaged.c  | 13 +++++++------
 mm/shmem.c       | 19 +++++++++----------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..d301c323c69a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2521,7 +2521,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	struct address_space *swap_cache = NULL;
 	unsigned long offset = 0;
 	unsigned int nr = thp_nr_pages(head);
-	int i;
+	int i, nr_dropped = 0;
 
 	/* complete memcg works before add pages to LRU */
 	split_page_memcg(head, nr);
@@ -2546,7 +2546,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 			struct folio *tail = page_folio(head + i);
 
 			if (shmem_mapping(head->mapping))
-				shmem_uncharge(head->mapping->host, 1);
+				nr_dropped++;
 			else if (folio_test_clear_dirty(tail))
 				folio_account_cleaned(tail,
 					inode_to_wb(folio->mapping->host));
@@ -2583,6 +2583,8 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 	local_irq_enable();
 
+	if (nr_dropped)
+		shmem_uncharge(head->mapping->host, nr_dropped);
 	remap_page(folio, nr);
 
 	if (PageSwapCache(head)) {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 78c8d5d8b628..47d1d32c734f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1955,10 +1955,6 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 						goto xa_locked;
 					}
 				}
-				if (!shmem_charge(mapping->host, 1)) {
-					result = SCAN_FAIL;
-					goto xa_locked;
-				}
 				nr_none++;
 				continue;
 			}
@@ -2145,8 +2141,13 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 	 */
 	try_to_unmap_flush();
 
-	if (result != SCAN_SUCCEED)
+	if (result == SCAN_SUCCEED && nr_none &&
+	    !shmem_charge(mapping->host, nr_none))
+		result = SCAN_FAIL;
+	if (result != SCAN_SUCCEED) {
+		nr_none = 0;
 		goto rollback;
+	}
 
 	/*
 	 * The old pages are locked, so they won't change anymore.
@@ -2283,8 +2284,8 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 	if (nr_none) {
 		xas_lock_irq(&xas);
 		mapping->nrpages -= nr_none;
-		shmem_uncharge(mapping->host, nr_none);
 		xas_unlock_irq(&xas);
+		shmem_uncharge(mapping->host, nr_none);
 	}
 
 	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
diff --git a/mm/shmem.c b/mm/shmem.c
index bd02909bacd6..5f83c18abc45 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -424,18 +424,20 @@  static void shmem_recalc_inode(struct inode *inode)
 bool shmem_charge(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned long flags;
+	struct address_space *mapping = inode->i_mapping;
 
 	if (shmem_inode_acct_block(inode, pages))
 		return false;
 
 	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
-	inode->i_mapping->nrpages += pages;
+	xa_lock_irq(&mapping->i_pages);
+	mapping->nrpages += pages;
+	xa_unlock_irq(&mapping->i_pages);
 
-	spin_lock_irqsave(&info->lock, flags);
+	spin_lock_irq(&info->lock);
 	info->alloced += pages;
 	shmem_recalc_inode(inode);
-	spin_unlock_irqrestore(&info->lock, flags);
+	spin_unlock_irq(&info->lock);
 
 	return true;
 }
@@ -443,16 +445,13 @@  bool shmem_charge(struct inode *inode, long pages)
 void shmem_uncharge(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned long flags;
 
 	/* nrpages adjustment done by __filemap_remove_folio() or caller */
 
-	spin_lock_irqsave(&info->lock, flags);
-	info->alloced -= pages;
+	spin_lock_irq(&info->lock);
 	shmem_recalc_inode(inode);
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	shmem_inode_unacct_blocks(inode, pages);
+	/* which has called shmem_inode_unacct_blocks() if necessary */
+	spin_unlock_irq(&info->lock);
 }
 
 /*

[7/7] shmem: fix quota lock nesting in huge hole handling

Commit Message

Patch