diff mbox series

xfs: using mutex instead of semaphore for xfs_buf_lock()

Message ID 20241219171629.73327-1-alexjlzheng@tencent.com (mailing list archive)
State New
Headers show
Series xfs: using mutex instead of semaphore for xfs_buf_lock() | expand

Commit Message

Jinliang Zheng Dec. 19, 2024, 5:16 p.m. UTC
xfs_buf uses a semaphore for mutual exclusion, and its count value
is initialized to 1, which is equivalent to a mutex.

However, mutex->owner can provide more information when analyzing
vmcore, making it easier for us to identify which task currently
holds the lock.

Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
---
 fs/xfs/xfs_buf.c   |  9 +++++----
 fs/xfs/xfs_buf.h   |  4 ++--
 fs/xfs/xfs_trace.h | 25 +++++--------------------
 3 files changed, 12 insertions(+), 26 deletions(-)

Comments

Darrick J. Wong Dec. 19, 2024, 5:36 p.m. UTC | #1
On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> xfs_buf uses a semaphore for mutual exclusion, and its count value
> is initialized to 1, which is equivalent to a mutex.
> 
> However, mutex->owner can provide more information when analyzing
> vmcore, making it easier for us to identify which task currently
> holds the lock.

Does XFS pass buffers between tasks?  xfs_btree_split has that whole
blob of ugly code where it can pass a locked inode and transaction to a
workqueue function to avoid overrunning the kernel stack.

--D

> Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
> ---
>  fs/xfs/xfs_buf.c   |  9 +++++----
>  fs/xfs/xfs_buf.h   |  4 ++--
>  fs/xfs/xfs_trace.h | 25 +++++--------------------
>  3 files changed, 12 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index aa4dbda7b536..7c59d7905ea1 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -243,7 +243,8 @@ _xfs_buf_alloc(
>  	INIT_LIST_HEAD(&bp->b_lru);
>  	INIT_LIST_HEAD(&bp->b_list);
>  	INIT_LIST_HEAD(&bp->b_li_list);
> -	sema_init(&bp->b_sema, 0); /* held, no waiters */
> +	mutex_init(&bp->b_mutex);
> +	mutex_lock(&bp->b_mutex); /* held, no waiters */
>  	spin_lock_init(&bp->b_lock);
>  	bp->b_target = target;
>  	bp->b_mount = target->bt_mount;
> @@ -1168,7 +1169,7 @@ xfs_buf_trylock(
>  {
>  	int			locked;
>  
> -	locked = down_trylock(&bp->b_sema) == 0;
> +	locked = mutex_trylock(&bp->b_mutex);
>  	if (locked)
>  		trace_xfs_buf_trylock(bp, _RET_IP_);
>  	else
> @@ -1193,7 +1194,7 @@ xfs_buf_lock(
>  
>  	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
>  		xfs_log_force(bp->b_mount, 0);
> -	down(&bp->b_sema);
> +	mutex_lock(&bp->b_mutex);
>  
>  	trace_xfs_buf_lock_done(bp, _RET_IP_);
>  }
> @@ -1204,7 +1205,7 @@ xfs_buf_unlock(
>  {
>  	ASSERT(xfs_buf_islocked(bp));
>  
> -	up(&bp->b_sema);
> +	mutex_unlock(&bp->b_mutex);
>  	trace_xfs_buf_unlock(bp, _RET_IP_);
>  }
>  
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index b1580644501f..2c48e388d451 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -171,7 +171,7 @@ struct xfs_buf {
>  	atomic_t		b_hold;		/* reference count */
>  	atomic_t		b_lru_ref;	/* lru reclaim ref count */
>  	xfs_buf_flags_t		b_flags;	/* status flags */
> -	struct semaphore	b_sema;		/* semaphore for lockables */
> +	struct mutex		b_mutex;	/* mutex for lockables */
>  
>  	/*
>  	 * concurrent access to b_lru and b_lru_flags are protected by
> @@ -304,7 +304,7 @@ extern int xfs_buf_trylock(struct xfs_buf *);
>  extern void xfs_buf_lock(struct xfs_buf *);
>  extern void xfs_buf_unlock(struct xfs_buf *);
>  #define xfs_buf_islocked(bp) \
> -	((bp)->b_sema.count <= 0)
> +	mutex_is_locked(&(bp)->b_mutex)
>  
>  static inline void xfs_buf_relse(struct xfs_buf *bp)
>  {
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 180ce697305a..ba6c003b82af 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -443,7 +443,6 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
>  		__field(int, nblks)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(unsigned, flags)
>  		__field(unsigned long, caller_ip)
>  		__field(const void *, buf_ops)
> @@ -454,19 +453,17 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
>  		__entry->nblks = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->flags = bp->b_flags;
>  		__entry->caller_ip = caller_ip;
>  		__entry->buf_ops = bp->b_ops;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s bufops %pS caller %pS",
> +		  "flags %s bufops %pS caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->nblks,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  __entry->buf_ops,
>  		  (void *)__entry->caller_ip)
> @@ -514,7 +511,6 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
>  		__field(unsigned int, length)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(unsigned, flags)
>  		__field(unsigned long, caller_ip)
>  	),
> @@ -525,17 +521,15 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
>  		__entry->flags = flags;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->caller_ip = caller_ip;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s caller %pS",
> +		  "flags %s caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->length,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  (void *)__entry->caller_ip)
>  )
> @@ -558,7 +552,6 @@ TRACE_EVENT(xfs_buf_ioerror,
>  		__field(unsigned, flags)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(int, error)
>  		__field(xfs_failaddr_t, caller_ip)
>  	),
> @@ -568,19 +561,17 @@ TRACE_EVENT(xfs_buf_ioerror,
>  		__entry->length = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->error = error;
>  		__entry->flags = bp->b_flags;
>  		__entry->caller_ip = caller_ip;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d error %d flags %s caller %pS",
> +		  "error %d flags %s caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->length,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __entry->error,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  (void *)__entry->caller_ip)
> @@ -595,7 +586,6 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
>  		__field(unsigned int, buf_len)
>  		__field(int, buf_hold)
>  		__field(int, buf_pincount)
> -		__field(int, buf_lockval)
>  		__field(unsigned, buf_flags)
>  		__field(unsigned, bli_recur)
>  		__field(int, bli_refcount)
> @@ -612,18 +602,16 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
>  		__entry->buf_flags = bip->bli_buf->b_flags;
>  		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
>  		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
> -		__entry->buf_lockval = bip->bli_buf->b_sema.count;
>  		__entry->li_flags = bip->bli_item.li_flags;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s recur %d refcount %d bliflags %s "
> +		  "flags %s recur %d refcount %d bliflags %s "
>  		  "liflags %s",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->buf_bno,
>  		  __entry->buf_len,
>  		  __entry->buf_hold,
>  		  __entry->buf_pincount,
> -		  __entry->buf_lockval,
>  		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
>  		  __entry->bli_recur,
>  		  __entry->bli_refcount,
> @@ -4802,7 +4790,6 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
>  		__field(int, nblks)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned int, lockval)
>  		__field(unsigned int, flags)
>  	),
>  	TP_fast_assign(
> @@ -4811,16 +4798,14 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
>  		__entry->nblks = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->flags = bp->b_flags;
>  	),
> -	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
> +	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d flags %s",
>  		  __entry->xfino,
>  		  (unsigned long long)__entry->bno,
>  		  __entry->nblks,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
>  )
>  
> -- 
> 2.41.1
> 
>
Jinliang Zheng Dec. 19, 2024, 5:51 p.m. UTC | #2
On Thu, 19 Dec 2024 09:36:15 -0800, Darrick J. Wong wrote:
> On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> > xfs_buf uses a semaphore for mutual exclusion, and its count value
> > is initialized to 1, which is equivalent to a mutex.
> > 
> > However, mutex->owner can provide more information when analyzing
> > vmcore, making it easier for us to identify which task currently
> > holds the lock.
> 
> Does XFS pass buffers between tasks?  xfs_btree_split has that whole
> blob of ugly code where it can pass a locked inode and transaction to a
> workqueue function to avoid overrunning the kernel stack.

When xfs_buf_lock() causes a hung task, we need to know which task
currently holds the lock.

However, sometimes the command 'search -t <address of xfs_buf>' may
not be effective, such as when the stack frame of xfs_buf_lock()
has been popped.

Replacing the semaphore with a mutex for xfs_buf has no negative
functional impact, but in certain situations, it indeed facilitates
our debugging.

Thank you,
Jinliang Zheng

> 
> --D
> 
> > Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
> > ---
> >  fs/xfs/xfs_buf.c   |  9 +++++----
> >  fs/xfs/xfs_buf.h   |  4 ++--
> >  fs/xfs/xfs_trace.h | 25 +++++--------------------
> >  3 files changed, 12 insertions(+), 26 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index aa4dbda7b536..7c59d7905ea1 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -243,7 +243,8 @@ _xfs_buf_alloc(
> >  	INIT_LIST_HEAD(&bp->b_lru);
> >  	INIT_LIST_HEAD(&bp->b_list);
> >  	INIT_LIST_HEAD(&bp->b_li_list);
> > -	sema_init(&bp->b_sema, 0); /* held, no waiters */
> > +	mutex_init(&bp->b_mutex);
> > +	mutex_lock(&bp->b_mutex); /* held, no waiters */
> >  	spin_lock_init(&bp->b_lock);
> >  	bp->b_target = target;
> >  	bp->b_mount = target->bt_mount;
> > @@ -1168,7 +1169,7 @@ xfs_buf_trylock(
> >  {
> >  	int			locked;
> >  
> > -	locked = down_trylock(&bp->b_sema) == 0;
> > +	locked = mutex_trylock(&bp->b_mutex);
> >  	if (locked)
> >  		trace_xfs_buf_trylock(bp, _RET_IP_);
> >  	else
> > @@ -1193,7 +1194,7 @@ xfs_buf_lock(
> >  
> >  	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
> >  		xfs_log_force(bp->b_mount, 0);
> > -	down(&bp->b_sema);
> > +	mutex_lock(&bp->b_mutex);
> >  
> >  	trace_xfs_buf_lock_done(bp, _RET_IP_);
> >  }
> > @@ -1204,7 +1205,7 @@ xfs_buf_unlock(
> >  {
> >  	ASSERT(xfs_buf_islocked(bp));
> >  
> > -	up(&bp->b_sema);
> > +	mutex_unlock(&bp->b_mutex);
> >  	trace_xfs_buf_unlock(bp, _RET_IP_);
> >  }
> >  
> > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> > index b1580644501f..2c48e388d451 100644
> > --- a/fs/xfs/xfs_buf.h
> > +++ b/fs/xfs/xfs_buf.h
> > @@ -171,7 +171,7 @@ struct xfs_buf {
> >  	atomic_t		b_hold;		/* reference count */
> >  	atomic_t		b_lru_ref;	/* lru reclaim ref count */
> >  	xfs_buf_flags_t		b_flags;	/* status flags */
> > -	struct semaphore	b_sema;		/* semaphore for lockables */
> > +	struct mutex		b_mutex;	/* mutex for lockables */
> >  
> >  	/*
> >  	 * concurrent access to b_lru and b_lru_flags are protected by
> > @@ -304,7 +304,7 @@ extern int xfs_buf_trylock(struct xfs_buf *);
> >  extern void xfs_buf_lock(struct xfs_buf *);
> >  extern void xfs_buf_unlock(struct xfs_buf *);
> >  #define xfs_buf_islocked(bp) \
> > -	((bp)->b_sema.count <= 0)
> > +	mutex_is_locked(&(bp)->b_mutex)
> >  
> >  static inline void xfs_buf_relse(struct xfs_buf *bp)
> >  {
> > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> > index 180ce697305a..ba6c003b82af 100644
> > --- a/fs/xfs/xfs_trace.h
> > +++ b/fs/xfs/xfs_trace.h
> > @@ -443,7 +443,6 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
> >  		__field(int, nblks)
> >  		__field(int, hold)
> >  		__field(int, pincount)
> > -		__field(unsigned, lockval)
> >  		__field(unsigned, flags)
> >  		__field(unsigned long, caller_ip)
> >  		__field(const void *, buf_ops)
> > @@ -454,19 +453,17 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
> >  		__entry->nblks = bp->b_length;
> >  		__entry->hold = atomic_read(&bp->b_hold);
> >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > -		__entry->lockval = bp->b_sema.count;
> >  		__entry->flags = bp->b_flags;
> >  		__entry->caller_ip = caller_ip;
> >  		__entry->buf_ops = bp->b_ops;
> >  	),
> >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > -		  "lock %d flags %s bufops %pS caller %pS",
> > +		  "flags %s bufops %pS caller %pS",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long long)__entry->bno,
> >  		  __entry->nblks,
> >  		  __entry->hold,
> >  		  __entry->pincount,
> > -		  __entry->lockval,
> >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> >  		  __entry->buf_ops,
> >  		  (void *)__entry->caller_ip)
> > @@ -514,7 +511,6 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
> >  		__field(unsigned int, length)
> >  		__field(int, hold)
> >  		__field(int, pincount)
> > -		__field(unsigned, lockval)
> >  		__field(unsigned, flags)
> >  		__field(unsigned long, caller_ip)
> >  	),
> > @@ -525,17 +521,15 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
> >  		__entry->flags = flags;
> >  		__entry->hold = atomic_read(&bp->b_hold);
> >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > -		__entry->lockval = bp->b_sema.count;
> >  		__entry->caller_ip = caller_ip;
> >  	),
> >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > -		  "lock %d flags %s caller %pS",
> > +		  "flags %s caller %pS",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long long)__entry->bno,
> >  		  __entry->length,
> >  		  __entry->hold,
> >  		  __entry->pincount,
> > -		  __entry->lockval,
> >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> >  		  (void *)__entry->caller_ip)
> >  )
> > @@ -558,7 +552,6 @@ TRACE_EVENT(xfs_buf_ioerror,
> >  		__field(unsigned, flags)
> >  		__field(int, hold)
> >  		__field(int, pincount)
> > -		__field(unsigned, lockval)
> >  		__field(int, error)
> >  		__field(xfs_failaddr_t, caller_ip)
> >  	),
> > @@ -568,19 +561,17 @@ TRACE_EVENT(xfs_buf_ioerror,
> >  		__entry->length = bp->b_length;
> >  		__entry->hold = atomic_read(&bp->b_hold);
> >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > -		__entry->lockval = bp->b_sema.count;
> >  		__entry->error = error;
> >  		__entry->flags = bp->b_flags;
> >  		__entry->caller_ip = caller_ip;
> >  	),
> >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > -		  "lock %d error %d flags %s caller %pS",
> > +		  "error %d flags %s caller %pS",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long long)__entry->bno,
> >  		  __entry->length,
> >  		  __entry->hold,
> >  		  __entry->pincount,
> > -		  __entry->lockval,
> >  		  __entry->error,
> >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> >  		  (void *)__entry->caller_ip)
> > @@ -595,7 +586,6 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
> >  		__field(unsigned int, buf_len)
> >  		__field(int, buf_hold)
> >  		__field(int, buf_pincount)
> > -		__field(int, buf_lockval)
> >  		__field(unsigned, buf_flags)
> >  		__field(unsigned, bli_recur)
> >  		__field(int, bli_refcount)
> > @@ -612,18 +602,16 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
> >  		__entry->buf_flags = bip->bli_buf->b_flags;
> >  		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
> >  		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
> > -		__entry->buf_lockval = bip->bli_buf->b_sema.count;
> >  		__entry->li_flags = bip->bli_item.li_flags;
> >  	),
> >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > -		  "lock %d flags %s recur %d refcount %d bliflags %s "
> > +		  "flags %s recur %d refcount %d bliflags %s "
> >  		  "liflags %s",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long long)__entry->buf_bno,
> >  		  __entry->buf_len,
> >  		  __entry->buf_hold,
> >  		  __entry->buf_pincount,
> > -		  __entry->buf_lockval,
> >  		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
> >  		  __entry->bli_recur,
> >  		  __entry->bli_refcount,
> > @@ -4802,7 +4790,6 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
> >  		__field(int, nblks)
> >  		__field(int, hold)
> >  		__field(int, pincount)
> > -		__field(unsigned int, lockval)
> >  		__field(unsigned int, flags)
> >  	),
> >  	TP_fast_assign(
> > @@ -4811,16 +4798,14 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
> >  		__entry->nblks = bp->b_length;
> >  		__entry->hold = atomic_read(&bp->b_hold);
> >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > -		__entry->lockval = bp->b_sema.count;
> >  		__entry->flags = bp->b_flags;
> >  	),
> > -	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
> > +	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d flags %s",
> >  		  __entry->xfino,
> >  		  (unsigned long long)__entry->bno,
> >  		  __entry->nblks,
> >  		  __entry->hold,
> >  		  __entry->pincount,
> > -		  __entry->lockval,
> >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
> >  )
> >  
> > -- 
> > 2.41.1
> > 
> >
Darrick J. Wong Dec. 19, 2024, 6:35 p.m. UTC | #3
On Fri, Dec 20, 2024 at 01:51:49AM +0800, Jinliang Zheng wrote:
> On Thu, 19 Dec 2024 09:36:15 -0800, Darrick J. Wong wrote:
> > On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> > > xfs_buf uses a semaphore for mutual exclusion, and its count value
> > > is initialized to 1, which is equivalent to a mutex.
> > > 
> > > However, mutex->owner can provide more information when analyzing
> > > vmcore, making it easier for us to identify which task currently
> > > holds the lock.
> > 
> > Does XFS pass buffers between tasks?  xfs_btree_split has that whole
> > blob of ugly code where it can pass a locked inode and transaction to a
> > workqueue function to avoid overrunning the kernel stack.
> 
> When xfs_buf_lock() causes a hung task, we need to know which task
> currently holds the lock.
> 
> However, sometimes the command 'search -t <address of xfs_buf>' may
> not be effective, such as when the stack frame of xfs_buf_lock()
> has been popped.
> 
> Replacing the semaphore with a mutex for xfs_buf has no negative
> functional impact, but in certain situations, it indeed facilitates
> our debugging.

Oh?  What will the users of mutex::owner react when the xfs_buf
protected by the mutex is accessed by the workqueue thread?  What
happens when the buffers locked by the btree split worker are passed
back to the original thread when the worker completes?

Also, notice how lockdep doesn't track semaphores but it does track
mutexes?  Will that cause problems with lockdep?

--D

> Thank you,
> Jinliang Zheng
> 
> > 
> > --D
> > 
> > > Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
> > > ---
> > >  fs/xfs/xfs_buf.c   |  9 +++++----
> > >  fs/xfs/xfs_buf.h   |  4 ++--
> > >  fs/xfs/xfs_trace.h | 25 +++++--------------------
> > >  3 files changed, 12 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > > index aa4dbda7b536..7c59d7905ea1 100644
> > > --- a/fs/xfs/xfs_buf.c
> > > +++ b/fs/xfs/xfs_buf.c
> > > @@ -243,7 +243,8 @@ _xfs_buf_alloc(
> > >  	INIT_LIST_HEAD(&bp->b_lru);
> > >  	INIT_LIST_HEAD(&bp->b_list);
> > >  	INIT_LIST_HEAD(&bp->b_li_list);
> > > -	sema_init(&bp->b_sema, 0); /* held, no waiters */
> > > +	mutex_init(&bp->b_mutex);
> > > +	mutex_lock(&bp->b_mutex); /* held, no waiters */
> > >  	spin_lock_init(&bp->b_lock);
> > >  	bp->b_target = target;
> > >  	bp->b_mount = target->bt_mount;
> > > @@ -1168,7 +1169,7 @@ xfs_buf_trylock(
> > >  {
> > >  	int			locked;
> > >  
> > > -	locked = down_trylock(&bp->b_sema) == 0;
> > > +	locked = mutex_trylock(&bp->b_mutex);
> > >  	if (locked)
> > >  		trace_xfs_buf_trylock(bp, _RET_IP_);
> > >  	else
> > > @@ -1193,7 +1194,7 @@ xfs_buf_lock(
> > >  
> > >  	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
> > >  		xfs_log_force(bp->b_mount, 0);
> > > -	down(&bp->b_sema);
> > > +	mutex_lock(&bp->b_mutex);
> > >  
> > >  	trace_xfs_buf_lock_done(bp, _RET_IP_);
> > >  }
> > > @@ -1204,7 +1205,7 @@ xfs_buf_unlock(
> > >  {
> > >  	ASSERT(xfs_buf_islocked(bp));
> > >  
> > > -	up(&bp->b_sema);
> > > +	mutex_unlock(&bp->b_mutex);
> > >  	trace_xfs_buf_unlock(bp, _RET_IP_);
> > >  }
> > >  
> > > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> > > index b1580644501f..2c48e388d451 100644
> > > --- a/fs/xfs/xfs_buf.h
> > > +++ b/fs/xfs/xfs_buf.h
> > > @@ -171,7 +171,7 @@ struct xfs_buf {
> > >  	atomic_t		b_hold;		/* reference count */
> > >  	atomic_t		b_lru_ref;	/* lru reclaim ref count */
> > >  	xfs_buf_flags_t		b_flags;	/* status flags */
> > > -	struct semaphore	b_sema;		/* semaphore for lockables */
> > > +	struct mutex		b_mutex;	/* mutex for lockables */
> > >  
> > >  	/*
> > >  	 * concurrent access to b_lru and b_lru_flags are protected by
> > > @@ -304,7 +304,7 @@ extern int xfs_buf_trylock(struct xfs_buf *);
> > >  extern void xfs_buf_lock(struct xfs_buf *);
> > >  extern void xfs_buf_unlock(struct xfs_buf *);
> > >  #define xfs_buf_islocked(bp) \
> > > -	((bp)->b_sema.count <= 0)
> > > +	mutex_is_locked(&(bp)->b_mutex)
> > >  
> > >  static inline void xfs_buf_relse(struct xfs_buf *bp)
> > >  {
> > > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> > > index 180ce697305a..ba6c003b82af 100644
> > > --- a/fs/xfs/xfs_trace.h
> > > +++ b/fs/xfs/xfs_trace.h
> > > @@ -443,7 +443,6 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
> > >  		__field(int, nblks)
> > >  		__field(int, hold)
> > >  		__field(int, pincount)
> > > -		__field(unsigned, lockval)
> > >  		__field(unsigned, flags)
> > >  		__field(unsigned long, caller_ip)
> > >  		__field(const void *, buf_ops)
> > > @@ -454,19 +453,17 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
> > >  		__entry->nblks = bp->b_length;
> > >  		__entry->hold = atomic_read(&bp->b_hold);
> > >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > > -		__entry->lockval = bp->b_sema.count;
> > >  		__entry->flags = bp->b_flags;
> > >  		__entry->caller_ip = caller_ip;
> > >  		__entry->buf_ops = bp->b_ops;
> > >  	),
> > >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > > -		  "lock %d flags %s bufops %pS caller %pS",
> > > +		  "flags %s bufops %pS caller %pS",
> > >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > >  		  (unsigned long long)__entry->bno,
> > >  		  __entry->nblks,
> > >  		  __entry->hold,
> > >  		  __entry->pincount,
> > > -		  __entry->lockval,
> > >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> > >  		  __entry->buf_ops,
> > >  		  (void *)__entry->caller_ip)
> > > @@ -514,7 +511,6 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
> > >  		__field(unsigned int, length)
> > >  		__field(int, hold)
> > >  		__field(int, pincount)
> > > -		__field(unsigned, lockval)
> > >  		__field(unsigned, flags)
> > >  		__field(unsigned long, caller_ip)
> > >  	),
> > > @@ -525,17 +521,15 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
> > >  		__entry->flags = flags;
> > >  		__entry->hold = atomic_read(&bp->b_hold);
> > >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > > -		__entry->lockval = bp->b_sema.count;
> > >  		__entry->caller_ip = caller_ip;
> > >  	),
> > >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > > -		  "lock %d flags %s caller %pS",
> > > +		  "flags %s caller %pS",
> > >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > >  		  (unsigned long long)__entry->bno,
> > >  		  __entry->length,
> > >  		  __entry->hold,
> > >  		  __entry->pincount,
> > > -		  __entry->lockval,
> > >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> > >  		  (void *)__entry->caller_ip)
> > >  )
> > > @@ -558,7 +552,6 @@ TRACE_EVENT(xfs_buf_ioerror,
> > >  		__field(unsigned, flags)
> > >  		__field(int, hold)
> > >  		__field(int, pincount)
> > > -		__field(unsigned, lockval)
> > >  		__field(int, error)
> > >  		__field(xfs_failaddr_t, caller_ip)
> > >  	),
> > > @@ -568,19 +561,17 @@ TRACE_EVENT(xfs_buf_ioerror,
> > >  		__entry->length = bp->b_length;
> > >  		__entry->hold = atomic_read(&bp->b_hold);
> > >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > > -		__entry->lockval = bp->b_sema.count;
> > >  		__entry->error = error;
> > >  		__entry->flags = bp->b_flags;
> > >  		__entry->caller_ip = caller_ip;
> > >  	),
> > >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > > -		  "lock %d error %d flags %s caller %pS",
> > > +		  "error %d flags %s caller %pS",
> > >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > >  		  (unsigned long long)__entry->bno,
> > >  		  __entry->length,
> > >  		  __entry->hold,
> > >  		  __entry->pincount,
> > > -		  __entry->lockval,
> > >  		  __entry->error,
> > >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
> > >  		  (void *)__entry->caller_ip)
> > > @@ -595,7 +586,6 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
> > >  		__field(unsigned int, buf_len)
> > >  		__field(int, buf_hold)
> > >  		__field(int, buf_pincount)
> > > -		__field(int, buf_lockval)
> > >  		__field(unsigned, buf_flags)
> > >  		__field(unsigned, bli_recur)
> > >  		__field(int, bli_refcount)
> > > @@ -612,18 +602,16 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
> > >  		__entry->buf_flags = bip->bli_buf->b_flags;
> > >  		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
> > >  		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
> > > -		__entry->buf_lockval = bip->bli_buf->b_sema.count;
> > >  		__entry->li_flags = bip->bli_item.li_flags;
> > >  	),
> > >  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> > > -		  "lock %d flags %s recur %d refcount %d bliflags %s "
> > > +		  "flags %s recur %d refcount %d bliflags %s "
> > >  		  "liflags %s",
> > >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > >  		  (unsigned long long)__entry->buf_bno,
> > >  		  __entry->buf_len,
> > >  		  __entry->buf_hold,
> > >  		  __entry->buf_pincount,
> > > -		  __entry->buf_lockval,
> > >  		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
> > >  		  __entry->bli_recur,
> > >  		  __entry->bli_refcount,
> > > @@ -4802,7 +4790,6 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
> > >  		__field(int, nblks)
> > >  		__field(int, hold)
> > >  		__field(int, pincount)
> > > -		__field(unsigned int, lockval)
> > >  		__field(unsigned int, flags)
> > >  	),
> > >  	TP_fast_assign(
> > > @@ -4811,16 +4798,14 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
> > >  		__entry->nblks = bp->b_length;
> > >  		__entry->hold = atomic_read(&bp->b_hold);
> > >  		__entry->pincount = atomic_read(&bp->b_pin_count);
> > > -		__entry->lockval = bp->b_sema.count;
> > >  		__entry->flags = bp->b_flags;
> > >  	),
> > > -	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
> > > +	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d flags %s",
> > >  		  __entry->xfino,
> > >  		  (unsigned long long)__entry->bno,
> > >  		  __entry->nblks,
> > >  		  __entry->hold,
> > >  		  __entry->pincount,
> > > -		  __entry->lockval,
> > >  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
> > >  )
> > >  
> > > -- 
> > > 2.41.1
> > > 
> > > 
>
Dave Chinner Jan. 15, 2025, 12:28 a.m. UTC | #4
On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> xfs_buf uses a semaphore for mutual exclusion, and its count value
> is initialized to 1, which is equivalent to a mutex.
> 
> However, mutex->owner can provide more information when analyzing
> vmcore, making it easier for us to identify which task currently
> holds the lock.

However, the buffer lock also protects the buffer state and contents
whilst IO id being performed and it *is not owned by any task*.

A single lock cycle for a buffer can pass through multiple tasks
before being unlocked in a different task to that which locked it:

p0			<intr>			<kworker>
xfs_buf_lock()
...
<submitted for async io>
<wait for IO completion>
		.....
			<io completion>
			queued to workqueue
		.....
						perform IO completion
						xfs_buf_unlock()


IOWs, the buffer lock here prevents any other task from accessing
and modifying the contents/state of the buffer until the IO in
flight is completed. i.e. the buffer contents are guaranteed to be
stable during write IO, and unreadable when uninitialised during
read IO....

i.e. the locking model used by xfs_buf objects is incompatible with
the single-owner-task critical section model implemented by
mutexes...

-Dave.
Jinliang Zheng Jan. 15, 2025, 12:05 p.m. UTC | #5
On Wed, 15 Jan 2025 11:28:54 +1100, Dave Chinner wrote:
> On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> > xfs_buf uses a semaphore for mutual exclusion, and its count value
> > is initialized to 1, which is equivalent to a mutex.
> > 
> > However, mutex->owner can provide more information when analyzing
> > vmcore, making it easier for us to identify which task currently
> > holds the lock.
> 
> However, the buffer lock also protects the buffer state and contents
> whilst IO id being performed and it *is not owned by any task*.
> 
> A single lock cycle for a buffer can pass through multiple tasks
> before being unlocked in a different task to that which locked it:
> 
> p0			<intr>			<kworker>
> xfs_buf_lock()
> ...
> <submitted for async io>
> <wait for IO completion>
> 		.....
> 			<io completion>
> 			queued to workqueue
> 		.....
> 						perform IO completion
> 						xfs_buf_unlock()
> 
> 
> IOWs, the buffer lock here prevents any other task from accessing
> and modifying the contents/state of the buffer until the IO in
> flight is completed. i.e. the buffer contents are guaranteed to be
> stable during write IO, and unreadable when uninitialised during
> read IO....

Yes.

> 
> i.e. the locking model used by xfs_buf objects is incompatible with
> the single-owner-task critical section model implemented by
> mutexes...
> 

Yes, from a model perspective.

This patch is proposed for two reasons:
1. The maximum count of the xfs_buf->b_sema is 1, which means that only one
   kernel code path can hold it at the same time. From this perspective,
   changing it to mutex will not have any functional impact.
2. When troubleshooting the hungtask of xfs, sometimes it is necessary to
   locate who has acquired the lock. Although, as you said, xfs_buf->b_sema
   will flow to other kernel code paths after being down(), it is also helpful
   to know which kernel code path locked it first.

Haha, that's just my thought. If you think there is really no need to know who
called the down() on xfs_buf->b_sema, please just ignore this patch.

Thank you.
Jinliang Zheng

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Dave Chinner Jan. 15, 2025, 8:54 p.m. UTC | #6
On Wed, Jan 15, 2025 at 08:05:21PM +0800, Jinliang Zheng wrote:
> On Wed, 15 Jan 2025 11:28:54 +1100, Dave Chinner wrote:
> > On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> > > xfs_buf uses a semaphore for mutual exclusion, and its count value
> > > is initialized to 1, which is equivalent to a mutex.
> > > 
> > > However, mutex->owner can provide more information when analyzing
> > > vmcore, making it easier for us to identify which task currently
> > > holds the lock.
> > 
> > However, the buffer lock also protects the buffer state and contents
> > whilst IO id being performed and it *is not owned by any task*.
> > 
> > A single lock cycle for a buffer can pass through multiple tasks
> > before being unlocked in a different task to that which locked it:
> > 
> > p0			<intr>			<kworker>
> > xfs_buf_lock()
> > ...
> > <submitted for async io>
> > <wait for IO completion>
> > 		.....
> > 			<io completion>
> > 			queued to workqueue
> > 		.....
> > 						perform IO completion
> > 						xfs_buf_unlock()
> > 
> > 
> > IOWs, the buffer lock here prevents any other task from accessing
> > and modifying the contents/state of the buffer until the IO in
> > flight is completed. i.e. the buffer contents are guaranteed to be
> > stable during write IO, and unreadable when uninitialised during
> > read IO....
> 
> Yes.
> 
> > 
> > i.e. the locking model used by xfs_buf objects is incompatible with
> > the single-owner-task critical section model implemented by
> > mutexes...
> > 
> 
> Yes, from a model perspective.
> 
> This patch is proposed for two reasons:
> 1. The maximum count of the xfs_buf->b_sema is 1, which means that only one
>    kernel code path can hold it at the same time. From this perspective,
>    changing it to mutex will not have any functional impact.
> 2. When troubleshooting the hungtask of xfs, sometimes it is necessary to
>    locate who has acquired the lock. Although, as you said, xfs_buf->b_sema
>    will flow to other kernel code paths after being down(), it is also helpful
>    to know which kernel code path locked it first.
> 
> Haha, that's just my thought. If you think there is really no need to know who
> called the down() on xfs_buf->b_sema, please just ignore this patch.

We are rejecting the patch because it's fundamentally broken, not
because we don't want debugging visibility.

If you want to track what task locked a semaphore, then that should
be added to the semaphore implementation. Changing the XFS locking
implementation is not the solution to the problem you are trying to
solve.

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..7c59d7905ea1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -243,7 +243,8 @@  _xfs_buf_alloc(
 	INIT_LIST_HEAD(&bp->b_lru);
 	INIT_LIST_HEAD(&bp->b_list);
 	INIT_LIST_HEAD(&bp->b_li_list);
-	sema_init(&bp->b_sema, 0); /* held, no waiters */
+	mutex_init(&bp->b_mutex);
+	mutex_lock(&bp->b_mutex); /* held, no waiters */
 	spin_lock_init(&bp->b_lock);
 	bp->b_target = target;
 	bp->b_mount = target->bt_mount;
@@ -1168,7 +1169,7 @@  xfs_buf_trylock(
 {
 	int			locked;
 
-	locked = down_trylock(&bp->b_sema) == 0;
+	locked = mutex_trylock(&bp->b_mutex);
 	if (locked)
 		trace_xfs_buf_trylock(bp, _RET_IP_);
 	else
@@ -1193,7 +1194,7 @@  xfs_buf_lock(
 
 	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
 		xfs_log_force(bp->b_mount, 0);
-	down(&bp->b_sema);
+	mutex_lock(&bp->b_mutex);
 
 	trace_xfs_buf_lock_done(bp, _RET_IP_);
 }
@@ -1204,7 +1205,7 @@  xfs_buf_unlock(
 {
 	ASSERT(xfs_buf_islocked(bp));
 
-	up(&bp->b_sema);
+	mutex_unlock(&bp->b_mutex);
 	trace_xfs_buf_unlock(bp, _RET_IP_);
 }
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b1580644501f..2c48e388d451 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -171,7 +171,7 @@  struct xfs_buf {
 	atomic_t		b_hold;		/* reference count */
 	atomic_t		b_lru_ref;	/* lru reclaim ref count */
 	xfs_buf_flags_t		b_flags;	/* status flags */
-	struct semaphore	b_sema;		/* semaphore for lockables */
+	struct mutex		b_mutex;	/* mutex for lockables */
 
 	/*
 	 * concurrent access to b_lru and b_lru_flags are protected by
@@ -304,7 +304,7 @@  extern int xfs_buf_trylock(struct xfs_buf *);
 extern void xfs_buf_lock(struct xfs_buf *);
 extern void xfs_buf_unlock(struct xfs_buf *);
 #define xfs_buf_islocked(bp) \
-	((bp)->b_sema.count <= 0)
+	mutex_is_locked(&(bp)->b_mutex)
 
 static inline void xfs_buf_relse(struct xfs_buf *bp)
 {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 180ce697305a..ba6c003b82af 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -443,7 +443,6 @@  DECLARE_EVENT_CLASS(xfs_buf_class,
 		__field(int, nblks)
 		__field(int, hold)
 		__field(int, pincount)
-		__field(unsigned, lockval)
 		__field(unsigned, flags)
 		__field(unsigned long, caller_ip)
 		__field(const void *, buf_ops)
@@ -454,19 +453,17 @@  DECLARE_EVENT_CLASS(xfs_buf_class,
 		__entry->nblks = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
 		__entry->buf_ops = bp->b_ops;
 	),
 	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
-		  "lock %d flags %s bufops %pS caller %pS",
+		  "flags %s bufops %pS caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
 		  __entry->nblks,
 		  __entry->hold,
 		  __entry->pincount,
-		  __entry->lockval,
 		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
 		  __entry->buf_ops,
 		  (void *)__entry->caller_ip)
@@ -514,7 +511,6 @@  DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 		__field(unsigned int, length)
 		__field(int, hold)
 		__field(int, pincount)
-		__field(unsigned, lockval)
 		__field(unsigned, flags)
 		__field(unsigned long, caller_ip)
 	),
@@ -525,17 +521,15 @@  DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 		__entry->flags = flags;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
 		__entry->caller_ip = caller_ip;
 	),
 	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
-		  "lock %d flags %s caller %pS",
+		  "flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
 		  __entry->length,
 		  __entry->hold,
 		  __entry->pincount,
-		  __entry->lockval,
 		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
 		  (void *)__entry->caller_ip)
 )
@@ -558,7 +552,6 @@  TRACE_EVENT(xfs_buf_ioerror,
 		__field(unsigned, flags)
 		__field(int, hold)
 		__field(int, pincount)
-		__field(unsigned, lockval)
 		__field(int, error)
 		__field(xfs_failaddr_t, caller_ip)
 	),
@@ -568,19 +561,17 @@  TRACE_EVENT(xfs_buf_ioerror,
 		__entry->length = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
 		__entry->error = error;
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
 	),
 	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
-		  "lock %d error %d flags %s caller %pS",
+		  "error %d flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
 		  __entry->length,
 		  __entry->hold,
 		  __entry->pincount,
-		  __entry->lockval,
 		  __entry->error,
 		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
 		  (void *)__entry->caller_ip)
@@ -595,7 +586,6 @@  DECLARE_EVENT_CLASS(xfs_buf_item_class,
 		__field(unsigned int, buf_len)
 		__field(int, buf_hold)
 		__field(int, buf_pincount)
-		__field(int, buf_lockval)
 		__field(unsigned, buf_flags)
 		__field(unsigned, bli_recur)
 		__field(int, bli_refcount)
@@ -612,18 +602,16 @@  DECLARE_EVENT_CLASS(xfs_buf_item_class,
 		__entry->buf_flags = bip->bli_buf->b_flags;
 		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
 		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-		__entry->buf_lockval = bip->bli_buf->b_sema.count;
 		__entry->li_flags = bip->bli_item.li_flags;
 	),
 	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
-		  "lock %d flags %s recur %d refcount %d bliflags %s "
+		  "flags %s recur %d refcount %d bliflags %s "
 		  "liflags %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->buf_bno,
 		  __entry->buf_len,
 		  __entry->buf_hold,
 		  __entry->buf_pincount,
-		  __entry->buf_lockval,
 		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
 		  __entry->bli_recur,
 		  __entry->bli_refcount,
@@ -4802,7 +4790,6 @@  DECLARE_EVENT_CLASS(xfbtree_buf_class,
 		__field(int, nblks)
 		__field(int, hold)
 		__field(int, pincount)
-		__field(unsigned int, lockval)
 		__field(unsigned int, flags)
 	),
 	TP_fast_assign(
@@ -4811,16 +4798,14 @@  DECLARE_EVENT_CLASS(xfbtree_buf_class,
 		__entry->nblks = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
 		__entry->flags = bp->b_flags;
 	),
-	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
+	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d flags %s",
 		  __entry->xfino,
 		  (unsigned long long)__entry->bno,
 		  __entry->nblks,
 		  __entry->hold,
 		  __entry->pincount,
-		  __entry->lockval,
 		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
 )