Message ID | 20250127150539.601009-1-hch@lst.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | xfs: remove xfs_buf_cache.bc_lock | expand |
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote: > xfs_buf_cache.bc_lock serializes adding buffers to and removing them from > the hashtable. But as the rhashtable code already uses fine grained > internal locking for inserts and removals the extra protection isn't > actually required. > > It also happens to fix a lock order inversion vs b_lock added by the > recent lookup race fix. > > Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race") > Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com> > Signed-off-by: Christoph Hellwig <hch@lst.de> Looks good, Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com> > --- > fs/xfs/xfs_buf.c | 20 ++++++++------------ > fs/xfs/xfs_buf.h | 1 - > 2 files changed, 8 insertions(+), 13 deletions(-) > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index d1d4a0a22e13..1fffa2990bd9 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; > * > * xfs_buf_rele: > * b_lock > - * pag_buf_lock > - * lru_lock > + * lru_lock > * > * xfs_buftarg_drain_rele > * lru_lock > @@ -502,7 +501,6 @@ int > xfs_buf_cache_init( > struct xfs_buf_cache *bch) > { > - spin_lock_init(&bch->bc_lock); > return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); > } > > @@ -652,17 +650,20 @@ xfs_buf_find_insert( > if (error) > goto out_free_buf; > > - spin_lock(&bch->bc_lock); > + /* The new buffer keeps the perag reference until it is freed. */ > + new_bp->b_pag = pag; > + > + rcu_read_lock(); > bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, > &new_bp->b_rhash_head, xfs_buf_hash_params); > if (IS_ERR(bp)) { > + rcu_read_unlock(); > error = PTR_ERR(bp); > - spin_unlock(&bch->bc_lock); > goto out_free_buf; > } > if (bp && xfs_buf_try_hold(bp)) { > /* found an existing buffer */ > - spin_unlock(&bch->bc_lock); > + rcu_read_unlock(); > error = xfs_buf_find_lock(bp, flags); > if (error) > xfs_buf_rele(bp); > @@ -670,10 +671,8 @@ xfs_buf_find_insert( > *bpp = bp; > goto out_free_buf; > } > + rcu_read_unlock(); > > - /* The new buffer keeps the perag reference until it is freed. */ > - new_bp->b_pag = pag; > - spin_unlock(&bch->bc_lock); > *bpp = new_bp; > return 0; > > @@ -1090,7 +1089,6 @@ xfs_buf_rele_cached( > } > > /* we are asked to drop the last reference */ > - spin_lock(&bch->bc_lock); > __xfs_buf_ioacct_dec(bp); > if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { > /* > @@ -1102,7 +1100,6 @@ xfs_buf_rele_cached( > bp->b_state &= ~XFS_BSTATE_DISPOSE; > else > bp->b_hold--; > - spin_unlock(&bch->bc_lock); > } else { > bp->b_hold--; > /* > @@ -1120,7 +1117,6 @@ xfs_buf_rele_cached( > ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); > rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, > xfs_buf_hash_params); > - spin_unlock(&bch->bc_lock); > if (pag) > xfs_perag_put(pag); > freebuf = true; > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h > index 7e73663c5d4a..3b4ed42e11c0 100644 > --- a/fs/xfs/xfs_buf.h > +++ b/fs/xfs/xfs_buf.h > @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; > #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ > > struct xfs_buf_cache { > - spinlock_t bc_lock; > struct rhashtable bc_hash; > }; > > -- > 2.45.2 >
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote: > xfs_buf_cache.bc_lock serializes adding buffers to and removing them from > the hashtable. But as the rhashtable code already uses fine grained > internal locking for inserts and removals the extra protection isn't > actually required. > > It also happens to fix a lock order inversion vs b_lock added by the > recent lookup race fix. > > Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race") > Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com> > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/xfs_buf.c | 20 ++++++++------------ > fs/xfs/xfs_buf.h | 1 - > 2 files changed, 8 insertions(+), 13 deletions(-) > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index d1d4a0a22e13..1fffa2990bd9 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; > * > * xfs_buf_rele: > * b_lock > - * pag_buf_lock > - * lru_lock > + * lru_lock > * > * xfs_buftarg_drain_rele > * lru_lock > @@ -502,7 +501,6 @@ int > xfs_buf_cache_init( > struct xfs_buf_cache *bch) > { > - spin_lock_init(&bch->bc_lock); > return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); > } > > @@ -652,17 +650,20 @@ xfs_buf_find_insert( > if (error) > goto out_free_buf; > > - spin_lock(&bch->bc_lock); > + /* The new buffer keeps the perag reference until it is freed. */ > + new_bp->b_pag = pag; > + > + rcu_read_lock(); > bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, > &new_bp->b_rhash_head, xfs_buf_hash_params); > if (IS_ERR(bp)) { > + rcu_read_unlock(); > error = PTR_ERR(bp); > - spin_unlock(&bch->bc_lock); > goto out_free_buf; > } > if (bp && xfs_buf_try_hold(bp)) { > /* found an existing buffer */ > - spin_unlock(&bch->bc_lock); > + rcu_read_unlock(); > error = xfs_buf_find_lock(bp, flags); > if (error) > xfs_buf_rele(bp); Ok, so now we can get racing inserts, which means this can find the buffer that has just been inserted by another thread in this same function. Or, indeed, and xfs_buf_lookup() call. What prevents those racing tasks from using this buffer before the task that inserted it can use it? I think that the the buffer lock being initialised to "held" and b_hold being initialised to 1 make this all work correctly, but comments that explicitly spell out why RCU inserts are safe (both in xfs_buf_alloc() for the init values and here) would be appreciated. > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h > index 7e73663c5d4a..3b4ed42e11c0 100644 > --- a/fs/xfs/xfs_buf.h > +++ b/fs/xfs/xfs_buf.h > @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; > #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ > > struct xfs_buf_cache { > - spinlock_t bc_lock; > struct rhashtable bc_hash; > }; At this point, the struct xfs_buf_cache structure can go away, right? (separate patch and all that...) -Dave.
On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote: > Ok, so now we can get racing inserts, which means this can find > the buffer that has just been inserted by another thread in this > same function. Or, indeed, and xfs_buf_lookup() call. Yes. > What prevents > those racing tasks from using this buffer before the task that > inserted it can use it? > > I think that the the buffer lock being initialised to "held" and > b_hold being initialised to 1 make this all work correctly, Exactly, the buffer is inserted with the b_sema held and b_hold initializes 1, aka locked and held. > but > comments that explicitly spell out why RCU inserts are safe > (both in xfs_buf_alloc() for the init values and here) would be > appreciated. Sure. > > struct xfs_buf_cache { > > - spinlock_t bc_lock; > > struct rhashtable bc_hash; > > }; > > At this point, the struct xfs_buf_cache structure can go away, > right? (separate patch and all that...) Yes. And in fact I think the per-pag hash should also go away, as with the per-bucket locking there is no point in it. I've had this patch in my testing runs for a while, which I think is where we should be going: http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb
On Tue, Jan 28, 2025 at 06:06:14AM +0100, Christoph Hellwig wrote: > On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote: > > Ok, so now we can get racing inserts, which means this can find > > the buffer that has just been inserted by another thread in this > > same function. Or, indeed, and xfs_buf_lookup() call. > > Yes. > > > What prevents > > those racing tasks from using this buffer before the task that > > inserted it can use it? > > > > I think that the the buffer lock being initialised to "held" and > > b_hold being initialised to 1 make this all work correctly, > > Exactly, the buffer is inserted with the b_sema held and b_hold > initializes 1, aka locked and held. > > > but > > comments that explicitly spell out why RCU inserts are safe > > (both in xfs_buf_alloc() for the init values and here) would be > > appreciated. > > Sure. Thanks. > > > struct xfs_buf_cache { > > > - spinlock_t bc_lock; > > > struct rhashtable bc_hash; > > > }; > > > > At this point, the struct xfs_buf_cache structure can go away, > > right? (separate patch and all that...) > > Yes. And in fact I think the per-pag hash should also go away, as with > the per-bucket locking there is no point in it. I've had this patch in > my testing runs for a while, which I think is where we should be > going: > > http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb *nod* Code seems reasonable, but it'll need some benchmarking and scalability analysis before merging... -Dave.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1d4a0a22e13..1fffa2990bd9 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -502,7 +501,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -652,17 +650,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -670,10 +671,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1090,7 +1089,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1102,7 +1100,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1120,7 +1117,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a..3b4ed42e11c0 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; };
xfs_buf_cache.bc_lock serializes adding buffers to and removing them from the hashtable. But as the rhashtable code already uses fine grained internal locking for inserts and removals the extra protection isn't actually required. It also happens to fix a lock order inversion vs b_lock added by the recent lookup race fix. Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race") Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/xfs_buf.c | 20 ++++++++------------ fs/xfs/xfs_buf.h | 1 - 2 files changed, 8 insertions(+), 13 deletions(-)