diff mbox series

xfs: remove xfs_buf_cache.bc_lock

Message ID 20250127150539.601009-1-hch@lst.de (mailing list archive)
State New
Headers show
Series xfs: remove xfs_buf_cache.bc_lock | expand

Commit Message

Christoph Hellwig Jan. 27, 2025, 3:05 p.m. UTC
xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
the hashtable.  But as the rhashtable code already uses fine grained
internal locking for inserts and removals the extra protection isn't
actually required.

It also happens to fix a lock order inversion vs b_lock added by the
recent lookup race fix.

Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 20 ++++++++------------
 fs/xfs/xfs_buf.h |  1 -
 2 files changed, 8 insertions(+), 13 deletions(-)

Comments

Carlos Maiolino Jan. 27, 2025, 3:45 p.m. UTC | #1
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote:
> xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
> the hashtable.  But as the rhashtable code already uses fine grained
> internal locking for inserts and removals the extra protection isn't
> actually required.
> 
> It also happens to fix a lock order inversion vs b_lock added by the
> recent lookup race fix.
> 
> Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
> Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>

> ---
>  fs/xfs/xfs_buf.c | 20 ++++++++------------
>  fs/xfs/xfs_buf.h |  1 -
>  2 files changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index d1d4a0a22e13..1fffa2990bd9 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
>   *
>   * xfs_buf_rele:
>   *	b_lock
> - *	  pag_buf_lock
> - *	    lru_lock
> + *	  lru_lock
>   *
>   * xfs_buftarg_drain_rele
>   *	lru_lock
> @@ -502,7 +501,6 @@ int
>  xfs_buf_cache_init(
>  	struct xfs_buf_cache	*bch)
>  {
> -	spin_lock_init(&bch->bc_lock);
>  	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
>  }
> 
> @@ -652,17 +650,20 @@ xfs_buf_find_insert(
>  	if (error)
>  		goto out_free_buf;
> 
> -	spin_lock(&bch->bc_lock);
> +	/* The new buffer keeps the perag reference until it is freed. */
> +	new_bp->b_pag = pag;
> +
> +	rcu_read_lock();
>  	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
>  			&new_bp->b_rhash_head, xfs_buf_hash_params);
>  	if (IS_ERR(bp)) {
> +		rcu_read_unlock();
>  		error = PTR_ERR(bp);
> -		spin_unlock(&bch->bc_lock);
>  		goto out_free_buf;
>  	}
>  	if (bp && xfs_buf_try_hold(bp)) {
>  		/* found an existing buffer */
> -		spin_unlock(&bch->bc_lock);
> +		rcu_read_unlock();
>  		error = xfs_buf_find_lock(bp, flags);
>  		if (error)
>  			xfs_buf_rele(bp);
> @@ -670,10 +671,8 @@ xfs_buf_find_insert(
>  			*bpp = bp;
>  		goto out_free_buf;
>  	}
> +	rcu_read_unlock();
> 
> -	/* The new buffer keeps the perag reference until it is freed. */
> -	new_bp->b_pag = pag;
> -	spin_unlock(&bch->bc_lock);
>  	*bpp = new_bp;
>  	return 0;
> 
> @@ -1090,7 +1089,6 @@ xfs_buf_rele_cached(
>  	}
> 
>  	/* we are asked to drop the last reference */
> -	spin_lock(&bch->bc_lock);
>  	__xfs_buf_ioacct_dec(bp);
>  	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
>  		/*
> @@ -1102,7 +1100,6 @@ xfs_buf_rele_cached(
>  			bp->b_state &= ~XFS_BSTATE_DISPOSE;
>  		else
>  			bp->b_hold--;
> -		spin_unlock(&bch->bc_lock);
>  	} else {
>  		bp->b_hold--;
>  		/*
> @@ -1120,7 +1117,6 @@ xfs_buf_rele_cached(
>  		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
>  		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
>  				xfs_buf_hash_params);
> -		spin_unlock(&bch->bc_lock);
>  		if (pag)
>  			xfs_perag_put(pag);
>  		freebuf = true;
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 7e73663c5d4a..3b4ed42e11c0 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
>  #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
> 
>  struct xfs_buf_cache {
> -	spinlock_t		bc_lock;
>  	struct rhashtable	bc_hash;
>  };
> 
> --
> 2.45.2
>
Dave Chinner Jan. 27, 2025, 8:19 p.m. UTC | #2
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote:
> xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
> the hashtable.  But as the rhashtable code already uses fine grained
> internal locking for inserts and removals the extra protection isn't
> actually required.
> 
> It also happens to fix a lock order inversion vs b_lock added by the
> recent lookup race fix.
> 
> Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
> Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 20 ++++++++------------
>  fs/xfs/xfs_buf.h |  1 -
>  2 files changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index d1d4a0a22e13..1fffa2990bd9 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
>   *
>   * xfs_buf_rele:
>   *	b_lock
> - *	  pag_buf_lock
> - *	    lru_lock
> + *	  lru_lock
>   *
>   * xfs_buftarg_drain_rele
>   *	lru_lock
> @@ -502,7 +501,6 @@ int
>  xfs_buf_cache_init(
>  	struct xfs_buf_cache	*bch)
>  {
> -	spin_lock_init(&bch->bc_lock);
>  	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
>  }
>  
> @@ -652,17 +650,20 @@ xfs_buf_find_insert(
>  	if (error)
>  		goto out_free_buf;
>  
> -	spin_lock(&bch->bc_lock);
> +	/* The new buffer keeps the perag reference until it is freed. */
> +	new_bp->b_pag = pag;
> +
> +	rcu_read_lock();
>  	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
>  			&new_bp->b_rhash_head, xfs_buf_hash_params);
>  	if (IS_ERR(bp)) {
> +		rcu_read_unlock();
>  		error = PTR_ERR(bp);
> -		spin_unlock(&bch->bc_lock);
>  		goto out_free_buf;
>  	}
>  	if (bp && xfs_buf_try_hold(bp)) {
>  		/* found an existing buffer */
> -		spin_unlock(&bch->bc_lock);
> +		rcu_read_unlock();
>  		error = xfs_buf_find_lock(bp, flags);
>  		if (error)
>  			xfs_buf_rele(bp);

Ok, so now we can get racing inserts, which means this can find
the buffer that has just been inserted by another thread in this
same function. Or, indeed, and xfs_buf_lookup() call. What prevents
those racing tasks from using this buffer before the task that
inserted it can use it?

I think that the the buffer lock being initialised to "held" and
b_hold being initialised to 1 make this all work correctly, but
comments that explicitly spell out why RCU inserts are safe
(both in xfs_buf_alloc() for the init values and here) would be
appreciated.

> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 7e73663c5d4a..3b4ed42e11c0 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
>  #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
>  
>  struct xfs_buf_cache {
> -	spinlock_t		bc_lock;
>  	struct rhashtable	bc_hash;
>  };

At this point, the struct xfs_buf_cache structure can go away,
right?  (separate patch and all that...)

-Dave.
Christoph Hellwig Jan. 28, 2025, 5:06 a.m. UTC | #3
On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote:
> Ok, so now we can get racing inserts, which means this can find
> the buffer that has just been inserted by another thread in this
> same function. Or, indeed, and xfs_buf_lookup() call.

Yes.

> What prevents
> those racing tasks from using this buffer before the task that
> inserted it can use it?
> 
> I think that the the buffer lock being initialised to "held" and
> b_hold being initialised to 1 make this all work correctly,

Exactly, the buffer is inserted with the b_sema held and b_hold
initializes 1, aka locked and held.

> but
> comments that explicitly spell out why RCU inserts are safe
> (both in xfs_buf_alloc() for the init values and here) would be
> appreciated.

Sure.

> >  struct xfs_buf_cache {
> > -	spinlock_t		bc_lock;
> >  	struct rhashtable	bc_hash;
> >  };
> 
> At this point, the struct xfs_buf_cache structure can go away,
> right?  (separate patch and all that...)

Yes.  And in fact I think the per-pag hash should also go away, as with
the per-bucket locking there is no point in it.  I've had this patch in
my testing runs for a while, which I think is where we should be
going:

http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb
Dave Chinner Jan. 28, 2025, 6:44 a.m. UTC | #4
On Tue, Jan 28, 2025 at 06:06:14AM +0100, Christoph Hellwig wrote:
> On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote:
> > Ok, so now we can get racing inserts, which means this can find
> > the buffer that has just been inserted by another thread in this
> > same function. Or, indeed, and xfs_buf_lookup() call.
> 
> Yes.
> 
> > What prevents
> > those racing tasks from using this buffer before the task that
> > inserted it can use it?
> > 
> > I think that the the buffer lock being initialised to "held" and
> > b_hold being initialised to 1 make this all work correctly,
> 
> Exactly, the buffer is inserted with the b_sema held and b_hold
> initializes 1, aka locked and held.
> 
> > but
> > comments that explicitly spell out why RCU inserts are safe
> > (both in xfs_buf_alloc() for the init values and here) would be
> > appreciated.
> 
> Sure.

Thanks.

> > >  struct xfs_buf_cache {
> > > -	spinlock_t		bc_lock;
> > >  	struct rhashtable	bc_hash;
> > >  };
> > 
> > At this point, the struct xfs_buf_cache structure can go away,
> > right?  (separate patch and all that...)
> 
> Yes.  And in fact I think the per-pag hash should also go away, as with
> the per-bucket locking there is no point in it.  I've had this patch in
> my testing runs for a while, which I think is where we should be
> going:
> 
> http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb

*nod*

Code seems reasonable, but it'll need some benchmarking and
scalability analysis before merging...

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1d4a0a22e13..1fffa2990bd9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -41,8 +41,7 @@  struct kmem_cache *xfs_buf_cache;
  *
  * xfs_buf_rele:
  *	b_lock
- *	  pag_buf_lock
- *	    lru_lock
+ *	  lru_lock
  *
  * xfs_buftarg_drain_rele
  *	lru_lock
@@ -502,7 +501,6 @@  int
 xfs_buf_cache_init(
 	struct xfs_buf_cache	*bch)
 {
-	spin_lock_init(&bch->bc_lock);
 	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
 }
 
@@ -652,17 +650,20 @@  xfs_buf_find_insert(
 	if (error)
 		goto out_free_buf;
 
-	spin_lock(&bch->bc_lock);
+	/* The new buffer keeps the perag reference until it is freed. */
+	new_bp->b_pag = pag;
+
+	rcu_read_lock();
 	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
 			&new_bp->b_rhash_head, xfs_buf_hash_params);
 	if (IS_ERR(bp)) {
+		rcu_read_unlock();
 		error = PTR_ERR(bp);
-		spin_unlock(&bch->bc_lock);
 		goto out_free_buf;
 	}
 	if (bp && xfs_buf_try_hold(bp)) {
 		/* found an existing buffer */
-		spin_unlock(&bch->bc_lock);
+		rcu_read_unlock();
 		error = xfs_buf_find_lock(bp, flags);
 		if (error)
 			xfs_buf_rele(bp);
@@ -670,10 +671,8 @@  xfs_buf_find_insert(
 			*bpp = bp;
 		goto out_free_buf;
 	}
+	rcu_read_unlock();
 
-	/* The new buffer keeps the perag reference until it is freed. */
-	new_bp->b_pag = pag;
-	spin_unlock(&bch->bc_lock);
 	*bpp = new_bp;
 	return 0;
 
@@ -1090,7 +1089,6 @@  xfs_buf_rele_cached(
 	}
 
 	/* we are asked to drop the last reference */
-	spin_lock(&bch->bc_lock);
 	__xfs_buf_ioacct_dec(bp);
 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
 		/*
@@ -1102,7 +1100,6 @@  xfs_buf_rele_cached(
 			bp->b_state &= ~XFS_BSTATE_DISPOSE;
 		else
 			bp->b_hold--;
-		spin_unlock(&bch->bc_lock);
 	} else {
 		bp->b_hold--;
 		/*
@@ -1120,7 +1117,6 @@  xfs_buf_rele_cached(
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
 				xfs_buf_hash_params);
-		spin_unlock(&bch->bc_lock);
 		if (pag)
 			xfs_perag_put(pag);
 		freebuf = true;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7e73663c5d4a..3b4ed42e11c0 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -80,7 +80,6 @@  typedef unsigned int xfs_buf_flags_t;
 #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
 struct xfs_buf_cache {
-	spinlock_t		bc_lock;
 	struct rhashtable	bc_hash;
 };