diff mbox series

fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag

Message ID 20240729091532.855688-1-max.kellermann@ionos.com (mailing list archive)
State New
Headers show
Series fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag | expand

Commit Message

Max Kellermann July 29, 2024, 9:15 a.m. UTC
This fixes a crash bug caused by commit ae678317b95e ("netfs: Remove
deprecated use of PG_private_2 as a second writeback flag") by
removing a leftover folio_end_private_2() call after all calls to
folio_start_private_2() had been removed by the commit.

By calling folio_end_private_2() without folio_start_private_2(), the
folio refcounter breaks and causes trouble like RCU stalls and general
protection faults.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=zCy9Y87R1qw@mail.gmail.com/
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/addr.c          |  2 +-
 fs/netfs/fscache_io.c   | 29 +----------------------------
 include/linux/fscache.h | 30 ++++--------------------------
 3 files changed, 6 insertions(+), 55 deletions(-)

Comments

Jeff Layton July 29, 2024, 12:56 p.m. UTC | #1
On Mon, 2024-07-29 at 11:15 +0200, Max Kellermann wrote:
> This fixes a crash bug caused by commit ae678317b95e ("netfs: Remove
> deprecated use of PG_private_2 as a second writeback flag") by
> removing a leftover folio_end_private_2() call after all calls to
> folio_start_private_2() had been removed by the commit.
> 
> By calling folio_end_private_2() without folio_start_private_2(), the
> folio refcounter breaks and causes trouble like RCU stalls and general
> protection faults.
> 
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
> Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=zCy9Y87R1qw@mail.gmail.com/
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
>  fs/ceph/addr.c          |  2 +-
>  fs/netfs/fscache_io.c   | 29 +----------------------------
>  include/linux/fscache.h | 30 ++++--------------------------
>  3 files changed, 6 insertions(+), 55 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 8c16bc5250ef..485cbd1730d1 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
>  	struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
>  
>  	fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
> -			       ceph_fscache_write_terminated, inode, true, caching);
> +			       ceph_fscache_write_terminated, inode, caching);
>  }
>  #else
>  static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
> diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
> index 38637e5c9b57..0d8f3f646598 100644
> --- a/fs/netfs/fscache_io.c
> +++ b/fs/netfs/fscache_io.c
> @@ -166,30 +166,10 @@ struct fscache_write_request {
>  	loff_t			start;
>  	size_t			len;
>  	bool			set_bits;
> -	bool			using_pgpriv2;
>  	netfs_io_terminated_t	term_func;
>  	void			*term_func_priv;
>  };
>  
> -void __fscache_clear_page_bits(struct address_space *mapping,
> -			       loff_t start, size_t len)
> -{
> -	pgoff_t first = start / PAGE_SIZE;
> -	pgoff_t last = (start + len - 1) / PAGE_SIZE;
> -	struct page *page;
> -
> -	if (len) {
> -		XA_STATE(xas, &mapping->i_pages, first);
> -
> -		rcu_read_lock();
> -		xas_for_each(&xas, page, last) {
> -			folio_end_private_2(page_folio(page));
> -		}
> -		rcu_read_unlock();
> -	}
> -}
> -EXPORT_SYMBOL(__fscache_clear_page_bits);
> -
>  /*
>   * Deal with the completion of writing the data to the cache.
>   */
> @@ -198,10 +178,6 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
>  {
>  	struct fscache_write_request *wreq = priv;
>  
> -	if (wreq->using_pgpriv2)
> -		fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
> -					wreq->set_bits);
> -
>  	if (wreq->term_func)
>  		wreq->term_func(wreq->term_func_priv, transferred_or_error,
>  				was_async);
> @@ -214,7 +190,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  			      loff_t start, size_t len, loff_t i_size,
>  			      netfs_io_terminated_t term_func,
>  			      void *term_func_priv,
> -			      bool using_pgpriv2, bool cond)
> +			      bool cond)
>  {
>  	struct fscache_write_request *wreq;
>  	struct netfs_cache_resources *cres;
> @@ -232,7 +208,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  	wreq->mapping		= mapping;
>  	wreq->start		= start;
>  	wreq->len		= len;
> -	wreq->using_pgpriv2	= using_pgpriv2;
>  	wreq->set_bits		= cond;
>  	wreq->term_func		= term_func;
>  	wreq->term_func_priv	= term_func_priv;
> @@ -260,8 +235,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  abandon_free:
>  	kfree(wreq);
>  abandon:
> -	if (using_pgpriv2)
> -		fscache_clear_page_bits(mapping, start, len, cond);
>  	if (term_func)
>  		term_func(term_func_priv, ret, false);
>  }
> diff --git a/include/linux/fscache.h b/include/linux/fscache.h
> index 9de27643607f..f8c52bddaa15 100644
> --- a/include/linux/fscache.h
> +++ b/include/linux/fscache.h
> @@ -177,8 +177,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  			      loff_t start, size_t len, loff_t i_size,
>  			      netfs_io_terminated_t term_func,
>  			      void *term_func_priv,
> -			      bool using_pgpriv2, bool cond);
> -extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
> +			      bool cond);
>  
>  /**
>   * fscache_acquire_volume - Register a volume as desiring caching services
> @@ -573,24 +572,6 @@ int fscache_write(struct netfs_cache_resources *cres,
>  	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
>  }
>  
> -/**
> - * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
> - * @mapping: The netfs inode to use as the source
> - * @start: The start position in @mapping
> - * @len: The amount of data to unlock
> - * @caching: If PG_fscache has been set
> - *
> - * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
> - * waiting.
> - */
> -static inline void fscache_clear_page_bits(struct address_space *mapping,
> -					   loff_t start, size_t len,
> -					   bool caching)
> -{
> -	if (caching)
> -		__fscache_clear_page_bits(mapping, start, len);
> -}
> -
>  /**
>   * fscache_write_to_cache - Save a write to the cache and clear PG_fscache
>   * @cookie: The cookie representing the cache object
> @@ -600,7 +581,6 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
>   * @i_size: The new size of the inode
>   * @term_func: The function to call upon completion
>   * @term_func_priv: The private data for @term_func
> - * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write
>   * @caching: If we actually want to do the caching
>   *
>   * Helper function for a netfs to write dirty data from an inode into the cache
> @@ -612,21 +592,19 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
>   * marked with PG_fscache.
>   *
>   * If given, @term_func will be called upon completion and supplied with
> - * @term_func_priv.  Note that if @using_pgpriv2 is set, the PG_private_2 flags
> - * will have been cleared by this point, so the netfs must retain its own pin
> - * on the mapping.
> + * @term_func_priv.
>   */
>  static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
>  					  struct address_space *mapping,
>  					  loff_t start, size_t len, loff_t i_size,
>  					  netfs_io_terminated_t term_func,
>  					  void *term_func_priv,
> -					  bool using_pgpriv2, bool caching)
> +					  bool caching)
>  {
>  	if (caching)
>  		__fscache_write_to_cache(cookie, mapping, start, len, i_size,
>  					 term_func, term_func_priv,
> -					 using_pgpriv2, caching);
> +					 caching);
>  	else if (term_func)
>  		term_func(term_func_priv, -ENOBUFS, false);
>  


(cc'ing the cephfs maintainers too)

Nice work! I'd prefer this patch over the first one. It looks like the
Fixes: commit went into v6.10. Did it go into earlier kernels too?

If so, what might be best is to take both of your patches. Have the
simple one first that just flips the flag, and mark that one for
stable. Then we can add the second patch on top to remove all of this
stuff for mainline.

Either way, you can add this to both patches:

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Max Kellermann July 29, 2024, 1:04 p.m. UTC | #2
On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote:
> Nice work! I'd prefer this patch over the first one. It looks like the
> Fixes: commit went into v6.10. Did it go into earlier kernels too?

No, it's 6.10 only.
Max Kellermann July 29, 2024, 3:35 p.m. UTC | #3
On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote:
> Either way, you can add this to both patches:
>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Stop the merge :-)

I just found that my patch introduces another lockup; copy_file_range
locks up this way:

 [<0>] folio_wait_private_2+0xd9/0x140
 [<0>] ceph_write_begin+0x56/0x90
 [<0>] generic_perform_write+0xc0/0x210
 [<0>] ceph_write_iter+0x4e2/0x650
 [<0>] iter_file_splice_write+0x30d/0x550
 [<0>] splice_file_range_actor+0x2c/0x40
 [<0>] splice_direct_to_actor+0xee/0x270
 [<0>] splice_file_range+0x80/0xc0
 [<0>] ceph_copy_file_range+0xbb/0x5b0
 [<0>] vfs_copy_file_range+0x33e/0x5d0
 [<0>] __x64_sys_copy_file_range+0xf7/0x200
 [<0>] do_syscall_64+0x64/0x100
 [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e

Turns out that there are still private_2 users left in both fs/ceph
and fs/netfs. My patches fix one problem, but cause another problem.
Too bad!

This leaves me confused again: how shall I fix this? Can all
folio_wait_private_2() calls simply be removed?
This looks like some refactoring gone wrong, and some parts don't make
sense (like netfs and ceph claim ownership of the folio_private
pointer). I could try to fix the mess, but I need to know how this is
meant to be. David, can you enlighten me?

Max
Jeff Layton July 29, 2024, 3:51 p.m. UTC | #4
On Mon, 2024-07-29 at 17:35 +0200, Max Kellermann wrote:
> On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org>
> wrote:
> > Either way, you can add this to both patches:
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> 
> Stop the merge :-)
> 
> I just found that my patch introduces another lockup; copy_file_range
> locks up this way:
> 
>  [<0>] folio_wait_private_2+0xd9/0x140
>  [<0>] ceph_write_begin+0x56/0x90
>  [<0>] generic_perform_write+0xc0/0x210
>  [<0>] ceph_write_iter+0x4e2/0x650
>  [<0>] iter_file_splice_write+0x30d/0x550
>  [<0>] splice_file_range_actor+0x2c/0x40
>  [<0>] splice_direct_to_actor+0xee/0x270
>  [<0>] splice_file_range+0x80/0xc0
>  [<0>] ceph_copy_file_range+0xbb/0x5b0
>  [<0>] vfs_copy_file_range+0x33e/0x5d0
>  [<0>] __x64_sys_copy_file_range+0xf7/0x200
>  [<0>] do_syscall_64+0x64/0x100
>  [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Turns out that there are still private_2 users left in both fs/ceph
> and fs/netfs. My patches fix one problem, but cause another problem.
> Too bad!
> 
> This leaves me confused again: how shall I fix this? Can all
> folio_wait_private_2() calls simply be removed?
> This looks like some refactoring gone wrong, and some parts don't
> make
> sense (like netfs and ceph claim ownership of the folio_private
> pointer). I could try to fix the mess, but I need to know how this is
> meant to be. David, can you enlighten me?
> 
> Max

I suspect the folio_wait_private_2 call in ceph_write_begin should have
also been removed in ae678317b95, and it just got missed somehow in the
original patch. All of the other callsites that did anything with
private_2 were removed in that patch.

David, can you confirm that?
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..485cbd1730d1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -512,7 +512,7 @@  static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
 	struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
 
 	fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
-			       ceph_fscache_write_terminated, inode, true, caching);
+			       ceph_fscache_write_terminated, inode, caching);
 }
 #else
 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..0d8f3f646598 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -166,30 +166,10 @@  struct fscache_write_request {
 	loff_t			start;
 	size_t			len;
 	bool			set_bits;
-	bool			using_pgpriv2;
 	netfs_io_terminated_t	term_func;
 	void			*term_func_priv;
 };
 
-void __fscache_clear_page_bits(struct address_space *mapping,
-			       loff_t start, size_t len)
-{
-	pgoff_t first = start / PAGE_SIZE;
-	pgoff_t last = (start + len - 1) / PAGE_SIZE;
-	struct page *page;
-
-	if (len) {
-		XA_STATE(xas, &mapping->i_pages, first);
-
-		rcu_read_lock();
-		xas_for_each(&xas, page, last) {
-			folio_end_private_2(page_folio(page));
-		}
-		rcu_read_unlock();
-	}
-}
-EXPORT_SYMBOL(__fscache_clear_page_bits);
-
 /*
  * Deal with the completion of writing the data to the cache.
  */
@@ -198,10 +178,6 @@  static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
 {
 	struct fscache_write_request *wreq = priv;
 
-	if (wreq->using_pgpriv2)
-		fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
-					wreq->set_bits);
-
 	if (wreq->term_func)
 		wreq->term_func(wreq->term_func_priv, transferred_or_error,
 				was_async);
@@ -214,7 +190,7 @@  void __fscache_write_to_cache(struct fscache_cookie *cookie,
 			      loff_t start, size_t len, loff_t i_size,
 			      netfs_io_terminated_t term_func,
 			      void *term_func_priv,
-			      bool using_pgpriv2, bool cond)
+			      bool cond)
 {
 	struct fscache_write_request *wreq;
 	struct netfs_cache_resources *cres;
@@ -232,7 +208,6 @@  void __fscache_write_to_cache(struct fscache_cookie *cookie,
 	wreq->mapping		= mapping;
 	wreq->start		= start;
 	wreq->len		= len;
-	wreq->using_pgpriv2	= using_pgpriv2;
 	wreq->set_bits		= cond;
 	wreq->term_func		= term_func;
 	wreq->term_func_priv	= term_func_priv;
@@ -260,8 +235,6 @@  void __fscache_write_to_cache(struct fscache_cookie *cookie,
 abandon_free:
 	kfree(wreq);
 abandon:
-	if (using_pgpriv2)
-		fscache_clear_page_bits(mapping, start, len, cond);
 	if (term_func)
 		term_func(term_func_priv, ret, false);
 }
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9de27643607f..f8c52bddaa15 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -177,8 +177,7 @@  void __fscache_write_to_cache(struct fscache_cookie *cookie,
 			      loff_t start, size_t len, loff_t i_size,
 			      netfs_io_terminated_t term_func,
 			      void *term_func_priv,
-			      bool using_pgpriv2, bool cond);
-extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
+			      bool cond);
 
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
@@ -573,24 +572,6 @@  int fscache_write(struct netfs_cache_resources *cres,
 	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
 }
 
-/**
- * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
- * @mapping: The netfs inode to use as the source
- * @start: The start position in @mapping
- * @len: The amount of data to unlock
- * @caching: If PG_fscache has been set
- *
- * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
- * waiting.
- */
-static inline void fscache_clear_page_bits(struct address_space *mapping,
-					   loff_t start, size_t len,
-					   bool caching)
-{
-	if (caching)
-		__fscache_clear_page_bits(mapping, start, len);
-}
-
 /**
  * fscache_write_to_cache - Save a write to the cache and clear PG_fscache
  * @cookie: The cookie representing the cache object
@@ -600,7 +581,6 @@  static inline void fscache_clear_page_bits(struct address_space *mapping,
  * @i_size: The new size of the inode
  * @term_func: The function to call upon completion
  * @term_func_priv: The private data for @term_func
- * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write
  * @caching: If we actually want to do the caching
  *
  * Helper function for a netfs to write dirty data from an inode into the cache
@@ -612,21 +592,19 @@  static inline void fscache_clear_page_bits(struct address_space *mapping,
  * marked with PG_fscache.
  *
  * If given, @term_func will be called upon completion and supplied with
- * @term_func_priv.  Note that if @using_pgpriv2 is set, the PG_private_2 flags
- * will have been cleared by this point, so the netfs must retain its own pin
- * on the mapping.
+ * @term_func_priv.
  */
 static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
 					  struct address_space *mapping,
 					  loff_t start, size_t len, loff_t i_size,
 					  netfs_io_terminated_t term_func,
 					  void *term_func_priv,
-					  bool using_pgpriv2, bool caching)
+					  bool caching)
 {
 	if (caching)
 		__fscache_write_to_cache(cookie, mapping, start, len, i_size,
 					 term_func, term_func_priv,
-					 using_pgpriv2, caching);
+					 caching);
 	else if (term_func)
 		term_func(term_func_priv, -ENOBUFS, false);