Message ID | 20240729091532.855688-1-max.kellermann@ionos.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag | expand |
On Mon, 2024-07-29 at 11:15 +0200, Max Kellermann wrote: > This fixes a crash bug caused by commit ae678317b95e ("netfs: Remove > deprecated use of PG_private_2 as a second writeback flag") by > removing a leftover folio_end_private_2() call after all calls to > folio_start_private_2() had been removed by the commit. > > By calling folio_end_private_2() without folio_start_private_2(), the > folio refcounter breaks and causes trouble like RCU stalls and general > protection faults. > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag") > Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=zCy9Y87R1qw@mail.gmail.com/ > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > --- > fs/ceph/addr.c | 2 +- > fs/netfs/fscache_io.c | 29 +---------------------------- > include/linux/fscache.h | 30 ++++-------------------------- > 3 files changed, 6 insertions(+), 55 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index 8c16bc5250ef..485cbd1730d1 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b > struct fscache_cookie *cookie = ceph_fscache_cookie(ci); > > fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), > - ceph_fscache_write_terminated, inode, true, caching); > + ceph_fscache_write_terminated, inode, caching); > } > #else > static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) > diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c > index 38637e5c9b57..0d8f3f646598 100644 > --- a/fs/netfs/fscache_io.c > +++ b/fs/netfs/fscache_io.c > @@ -166,30 +166,10 @@ struct fscache_write_request { > loff_t start; > size_t len; > bool set_bits; > - bool using_pgpriv2; > netfs_io_terminated_t term_func; > void *term_func_priv; > }; > > -void __fscache_clear_page_bits(struct address_space *mapping, > - loff_t start, size_t len) > -{ > - pgoff_t first = start / PAGE_SIZE; > - pgoff_t last = (start + len - 1) / PAGE_SIZE; > - struct page *page; > - > - if (len) { > - XA_STATE(xas, &mapping->i_pages, first); > - > - rcu_read_lock(); > - xas_for_each(&xas, page, last) { > - folio_end_private_2(page_folio(page)); > - } > - rcu_read_unlock(); > - } > -} > -EXPORT_SYMBOL(__fscache_clear_page_bits); > - > /* > * Deal with the completion of writing the data to the cache. > */ > @@ -198,10 +178,6 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, > { > struct fscache_write_request *wreq = priv; > > - if (wreq->using_pgpriv2) > - fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, > - wreq->set_bits); > - > if (wreq->term_func) > wreq->term_func(wreq->term_func_priv, transferred_or_error, > was_async); > @@ -214,7 +190,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, > loff_t start, size_t len, loff_t i_size, > netfs_io_terminated_t term_func, > void *term_func_priv, > - bool using_pgpriv2, bool cond) > + bool cond) > { > struct fscache_write_request *wreq; > struct netfs_cache_resources *cres; > @@ -232,7 +208,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, > wreq->mapping = mapping; > wreq->start = start; > wreq->len = len; > - wreq->using_pgpriv2 = using_pgpriv2; > wreq->set_bits = cond; > wreq->term_func = term_func; > wreq->term_func_priv = term_func_priv; > @@ -260,8 +235,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, > abandon_free: > kfree(wreq); > abandon: > - if (using_pgpriv2) > - fscache_clear_page_bits(mapping, start, len, cond); > if (term_func) > term_func(term_func_priv, ret, false); > } > diff --git a/include/linux/fscache.h b/include/linux/fscache.h > index 9de27643607f..f8c52bddaa15 100644 > --- a/include/linux/fscache.h > +++ b/include/linux/fscache.h > @@ -177,8 +177,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, > loff_t start, size_t len, loff_t i_size, > netfs_io_terminated_t term_func, > void *term_func_priv, > - bool using_pgpriv2, bool cond); > -extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); > + bool cond); > > /** > * fscache_acquire_volume - Register a volume as desiring caching services > @@ -573,24 +572,6 @@ int fscache_write(struct netfs_cache_resources *cres, > return ops->write(cres, start_pos, iter, term_func, term_func_priv); > } > > -/** > - * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages > - * @mapping: The netfs inode to use as the source > - * @start: The start position in @mapping > - * @len: The amount of data to unlock > - * @caching: If PG_fscache has been set > - * > - * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's > - * waiting. > - */ > -static inline void fscache_clear_page_bits(struct address_space *mapping, > - loff_t start, size_t len, > - bool caching) > -{ > - if (caching) > - __fscache_clear_page_bits(mapping, start, len); > -} > - > /** > * fscache_write_to_cache - Save a write to the cache and clear PG_fscache > * @cookie: The cookie representing the cache object > @@ -600,7 +581,6 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, > * @i_size: The new size of the inode > * @term_func: The function to call upon completion > * @term_func_priv: The private data for @term_func > - * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write > * @caching: If we actually want to do the caching > * > * Helper function for a netfs to write dirty data from an inode into the cache > @@ -612,21 +592,19 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, > * marked with PG_fscache. > * > * If given, @term_func will be called upon completion and supplied with > - * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags > - * will have been cleared by this point, so the netfs must retain its own pin > - * on the mapping. > + * @term_func_priv. > */ > static inline void fscache_write_to_cache(struct fscache_cookie *cookie, > struct address_space *mapping, > loff_t start, size_t len, loff_t i_size, > netfs_io_terminated_t term_func, > void *term_func_priv, > - bool using_pgpriv2, bool caching) > + bool caching) > { > if (caching) > __fscache_write_to_cache(cookie, mapping, start, len, i_size, > term_func, term_func_priv, > - using_pgpriv2, caching); > + caching); > else if (term_func) > term_func(term_func_priv, -ENOBUFS, false); > (cc'ing the cephfs maintainers too) Nice work! I'd prefer this patch over the first one. It looks like the Fixes: commit went into v6.10. Did it go into earlier kernels too? If so, what might be best is to take both of your patches. Have the simple one first that just flips the flag, and mark that one for stable. Then we can add the second patch on top to remove all of this stuff for mainline. Either way, you can add this to both patches: Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote: > Nice work! I'd prefer this patch over the first one. It looks like the > Fixes: commit went into v6.10. Did it go into earlier kernels too? No, it's 6.10 only.
On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote: > Either way, you can add this to both patches: > > Reviewed-by: Jeff Layton <jlayton@kernel.org> Stop the merge :-) I just found that my patch introduces another lockup; copy_file_range locks up this way: [<0>] folio_wait_private_2+0xd9/0x140 [<0>] ceph_write_begin+0x56/0x90 [<0>] generic_perform_write+0xc0/0x210 [<0>] ceph_write_iter+0x4e2/0x650 [<0>] iter_file_splice_write+0x30d/0x550 [<0>] splice_file_range_actor+0x2c/0x40 [<0>] splice_direct_to_actor+0xee/0x270 [<0>] splice_file_range+0x80/0xc0 [<0>] ceph_copy_file_range+0xbb/0x5b0 [<0>] vfs_copy_file_range+0x33e/0x5d0 [<0>] __x64_sys_copy_file_range+0xf7/0x200 [<0>] do_syscall_64+0x64/0x100 [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e Turns out that there are still private_2 users left in both fs/ceph and fs/netfs. My patches fix one problem, but cause another problem. Too bad! This leaves me confused again: how shall I fix this? Can all folio_wait_private_2() calls simply be removed? This looks like some refactoring gone wrong, and some parts don't make sense (like netfs and ceph claim ownership of the folio_private pointer). I could try to fix the mess, but I need to know how this is meant to be. David, can you enlighten me? Max
On Mon, 2024-07-29 at 17:35 +0200, Max Kellermann wrote: > On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> > wrote: > > Either way, you can add this to both patches: > > > > Reviewed-by: Jeff Layton <jlayton@kernel.org> > > Stop the merge :-) > > I just found that my patch introduces another lockup; copy_file_range > locks up this way: > > [<0>] folio_wait_private_2+0xd9/0x140 > [<0>] ceph_write_begin+0x56/0x90 > [<0>] generic_perform_write+0xc0/0x210 > [<0>] ceph_write_iter+0x4e2/0x650 > [<0>] iter_file_splice_write+0x30d/0x550 > [<0>] splice_file_range_actor+0x2c/0x40 > [<0>] splice_direct_to_actor+0xee/0x270 > [<0>] splice_file_range+0x80/0xc0 > [<0>] ceph_copy_file_range+0xbb/0x5b0 > [<0>] vfs_copy_file_range+0x33e/0x5d0 > [<0>] __x64_sys_copy_file_range+0xf7/0x200 > [<0>] do_syscall_64+0x64/0x100 > [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Turns out that there are still private_2 users left in both fs/ceph > and fs/netfs. My patches fix one problem, but cause another problem. > Too bad! > > This leaves me confused again: how shall I fix this? Can all > folio_wait_private_2() calls simply be removed? > This looks like some refactoring gone wrong, and some parts don't > make > sense (like netfs and ceph claim ownership of the folio_private > pointer). I could try to fix the mess, but I need to know how this is > meant to be. David, can you enlighten me? > > Max I suspect the folio_wait_private_2 call in ceph_write_begin should have also been removed in ae678317b95, and it just got missed somehow in the original patch. All of the other callsites that did anything with private_2 were removed in that patch. David, can you confirm that?
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c16bc5250ef..485cbd1730d1 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b struct fscache_cookie *cookie = ceph_fscache_cookie(ci); fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), - ceph_fscache_write_terminated, inode, true, caching); + ceph_fscache_write_terminated, inode, caching); } #else static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index 38637e5c9b57..0d8f3f646598 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -166,30 +166,10 @@ struct fscache_write_request { loff_t start; size_t len; bool set_bits; - bool using_pgpriv2; netfs_io_terminated_t term_func; void *term_func_priv; }; -void __fscache_clear_page_bits(struct address_space *mapping, - loff_t start, size_t len) -{ - pgoff_t first = start / PAGE_SIZE; - pgoff_t last = (start + len - 1) / PAGE_SIZE; - struct page *page; - - if (len) { - XA_STATE(xas, &mapping->i_pages, first); - - rcu_read_lock(); - xas_for_each(&xas, page, last) { - folio_end_private_2(page_folio(page)); - } - rcu_read_unlock(); - } -} -EXPORT_SYMBOL(__fscache_clear_page_bits); - /* * Deal with the completion of writing the data to the cache. */ @@ -198,10 +178,6 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, { struct fscache_write_request *wreq = priv; - if (wreq->using_pgpriv2) - fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, - wreq->set_bits); - if (wreq->term_func) wreq->term_func(wreq->term_func_priv, transferred_or_error, was_async); @@ -214,7 +190,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, - bool using_pgpriv2, bool cond) + bool cond) { struct fscache_write_request *wreq; struct netfs_cache_resources *cres; @@ -232,7 +208,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, wreq->mapping = mapping; wreq->start = start; wreq->len = len; - wreq->using_pgpriv2 = using_pgpriv2; wreq->set_bits = cond; wreq->term_func = term_func; wreq->term_func_priv = term_func_priv; @@ -260,8 +235,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, abandon_free: kfree(wreq); abandon: - if (using_pgpriv2) - fscache_clear_page_bits(mapping, start, len, cond); if (term_func) term_func(term_func_priv, ret, false); } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..f8c52bddaa15 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -177,8 +177,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, - bool using_pgpriv2, bool cond); -extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); + bool cond); /** * fscache_acquire_volume - Register a volume as desiring caching services @@ -573,24 +572,6 @@ int fscache_write(struct netfs_cache_resources *cres, return ops->write(cres, start_pos, iter, term_func, term_func_priv); } -/** - * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages - * @mapping: The netfs inode to use as the source - * @start: The start position in @mapping - * @len: The amount of data to unlock - * @caching: If PG_fscache has been set - * - * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's - * waiting. - */ -static inline void fscache_clear_page_bits(struct address_space *mapping, - loff_t start, size_t len, - bool caching) -{ - if (caching) - __fscache_clear_page_bits(mapping, start, len); -} - /** * fscache_write_to_cache - Save a write to the cache and clear PG_fscache * @cookie: The cookie representing the cache object @@ -600,7 +581,6 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, * @i_size: The new size of the inode * @term_func: The function to call upon completion * @term_func_priv: The private data for @term_func - * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write * @caching: If we actually want to do the caching * * Helper function for a netfs to write dirty data from an inode into the cache @@ -612,21 +592,19 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, * marked with PG_fscache. * * If given, @term_func will be called upon completion and supplied with - * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags - * will have been cleared by this point, so the netfs must retain its own pin - * on the mapping. + * @term_func_priv. */ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, - bool using_pgpriv2, bool caching) + bool caching) { if (caching) __fscache_write_to_cache(cookie, mapping, start, len, i_size, term_func, term_func_priv, - using_pgpriv2, caching); + caching); else if (term_func) term_func(term_func_priv, -ENOBUFS, false);