diff mbox series

[2/4] fs/buffer: Move BH_Uptodate_Lock locking into wrapper functions

Message ID 20190730120321.285095769@linutronix.de (mailing list archive)
State New, archived
Headers show
Series fs: Substitute bit-spinlocks for PREEMPT_RT and debugging | expand

Commit Message

Thomas Gleixner July 30, 2019, 11:24 a.m. UTC
Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
disable preemption, which is undesired for latency reasons and breaks when
regular spinlocks are taken within the bit_spinlock locked region because
regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
replaces the bit spinlocks with regular spinlocks to avoid this problem.

To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock
operations with inline functions, so the spinlock substitution can be done
at one place.

Using regular spinlocks can also be enabled for lock debugging purposes so
the lock operations become visible to lockdep.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
---
 fs/buffer.c                 |   20 ++++++--------------
 fs/ext4/page-io.c           |    6 ++----
 fs/ntfs/aops.c              |   10 +++-------
 include/linux/buffer_head.h |   16 ++++++++++++++++
 4 files changed, 27 insertions(+), 25 deletions(-)

Comments

Jan Kara July 31, 2019, 2:46 p.m. UTC | #1
On Tue 30-07-19 13:24:54, Thomas Gleixner wrote:
> Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
> disable preemption, which is undesired for latency reasons and breaks when
> regular spinlocks are taken within the bit_spinlock locked region because
> regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
> replaces the bit spinlocks with regular spinlocks to avoid this problem.
> 
> To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock
> operations with inline functions, so the spinlock substitution can be done
> at one place.
> 
> Using regular spinlocks can also be enabled for lock debugging purposes so
> the lock operations become visible to lockdep.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: "Theodore Ts'o" <tytso@mit.edu>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: linux-fsdevel@vger.kernel.org

Looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

BTW, it should be possible to get rid of BH_Uptodate_Lock altogether using
bio chaining (which was non-existent when this bh code was written) to make
sure IO completion function gets called only once all bios used to fill in
/ write out the page are done. It would be also more efficient. But I guess
that's an interesting cleanup project for some other time...

								Honza

> ---
>  fs/buffer.c                 |   20 ++++++--------------
>  fs/ext4/page-io.c           |    6 ++----
>  fs/ntfs/aops.c              |   10 +++-------
>  include/linux/buffer_head.h |   16 ++++++++++++++++
>  4 files changed, 27 insertions(+), 25 deletions(-)
> 
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -275,8 +275,7 @@ static void end_buffer_async_read(struct
>  	 * decide that the page is now completely done.
>  	 */
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	flags = bh_uptodate_lock_irqsave(first);
>  	clear_buffer_async_read(bh);
>  	unlock_buffer(bh);
>  	tmp = bh;
> @@ -289,8 +288,7 @@ static void end_buffer_async_read(struct
>  		}
>  		tmp = tmp->b_this_page;
>  	} while (tmp != bh);
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  
>  	/*
>  	 * If none of the buffers had errors and they are all
> @@ -302,9 +300,7 @@ static void end_buffer_async_read(struct
>  	return;
>  
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> -	return;
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  }
>  
>  /*
> @@ -331,8 +327,7 @@ void end_buffer_async_write(struct buffe
>  	}
>  
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	flags = bh_uptodate_lock_irqsave(first);
>  
>  	clear_buffer_async_write(bh);
>  	unlock_buffer(bh);
> @@ -344,15 +339,12 @@ void end_buffer_async_write(struct buffe
>  		}
>  		tmp = tmp->b_this_page;
>  	}
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  	end_page_writeback(page);
>  	return;
>  
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> -	return;
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  }
>  EXPORT_SYMBOL(end_buffer_async_write);
>  
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -90,8 +90,7 @@ static void ext4_finish_bio(struct bio *
>  		 * We check all buffers in the page under BH_Uptodate_Lock
>  		 * to avoid races with other end io clearing async_write flags
>  		 */
> -		local_irq_save(flags);
> -		bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
> +		flags = bh_uptodate_lock_irqsave(head);
>  		do {
>  			if (bh_offset(bh) < bio_start ||
>  			    bh_offset(bh) + bh->b_size > bio_end) {
> @@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio *
>  			if (bio->bi_status)
>  				buffer_io_error(bh);
>  		} while ((bh = bh->b_this_page) != head);
> -		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
> -		local_irq_restore(flags);
> +		bh_uptodate_unlock_irqrestore(head, flags);
>  		if (!under_io) {
>  			fscrypt_free_bounce_page(bounce_page);
>  			end_page_writeback(page);
> --- a/fs/ntfs/aops.c
> +++ b/fs/ntfs/aops.c
> @@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(s
>  				"0x%llx.", (unsigned long long)bh->b_blocknr);
>  	}
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	flags = bh_uptodate_lock_irqsave(first);
>  	clear_buffer_async_read(bh);
>  	unlock_buffer(bh);
>  	tmp = bh;
> @@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(s
>  		}
>  		tmp = tmp->b_this_page;
>  	} while (tmp != bh);
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  	/*
>  	 * If none of the buffers had errors then we can set the page uptodate,
>  	 * but we first have to perform the post read mst fixups, if the
> @@ -142,9 +140,7 @@ static void ntfs_end_buffer_async_read(s
>  	unlock_page(page);
>  	return;
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> -	return;
> +	bh_uptodate_unlock_irqrestore(first, flags);
>  }
>  
>  /**
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -78,6 +78,22 @@ struct buffer_head {
>  	atomic_t b_count;		/* users using this buffer_head */
>  };
>  
> +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
> +{
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
> +	return flags;
> +}
> +
> +static inline void
> +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
> +{
> +	bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
> +	local_irq_restore(flags);
> +}
> +
>  /*
>   * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
>   * and buffer_foo() functions.
> 
> 
>
Thomas Gleixner July 31, 2019, 10:27 p.m. UTC | #2
On Wed, 31 Jul 2019, Jan Kara wrote:
> On Tue 30-07-19 13:24:54, Thomas Gleixner wrote:
> > Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
> > disable preemption, which is undesired for latency reasons and breaks when
> > regular spinlocks are taken within the bit_spinlock locked region because
> > regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
> > replaces the bit spinlocks with regular spinlocks to avoid this problem.
> > 
> > To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock
> > operations with inline functions, so the spinlock substitution can be done
> > at one place.
> > 
> > Using regular spinlocks can also be enabled for lock debugging purposes so
> > the lock operations become visible to lockdep.
> > 
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > Cc: "Theodore Ts'o" <tytso@mit.edu>
> > Cc: Matthew Wilcox <willy@infradead.org>
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: linux-fsdevel@vger.kernel.org
> 
> Looks good to me. You can add:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
> BTW, it should be possible to get rid of BH_Uptodate_Lock altogether using
> bio chaining (which was non-existent when this bh code was written) to make
> sure IO completion function gets called only once all bios used to fill in
> / write out the page are done. It would be also more efficient. But I guess
> that's an interesting cleanup project for some other time...

While 'possible cleanup' is something which triggers a certain nerve, that
particular project certainly goes beyond my basic understanding of that
whole fs/block conglomerate. I rather leave that to people who actually
have a clue. :)

Thanks,

	tglx
diff mbox series

Patch

--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -275,8 +275,7 @@  static void end_buffer_async_read(struct
 	 * decide that the page is now completely done.
 	 */
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	flags = bh_uptodate_lock_irqsave(first);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -289,8 +288,7 @@  static void end_buffer_async_read(struct
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	bh_uptodate_unlock_irqrestore(first, flags);
 
 	/*
 	 * If none of the buffers had errors and they are all
@@ -302,9 +300,7 @@  static void end_buffer_async_read(struct
 	return;
 
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
-	return;
+	bh_uptodate_unlock_irqrestore(first, flags);
 }
 
 /*
@@ -331,8 +327,7 @@  void end_buffer_async_write(struct buffe
 	}
 
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	flags = bh_uptodate_lock_irqsave(first);
 
 	clear_buffer_async_write(bh);
 	unlock_buffer(bh);
@@ -344,15 +339,12 @@  void end_buffer_async_write(struct buffe
 		}
 		tmp = tmp->b_this_page;
 	}
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	bh_uptodate_unlock_irqrestore(first, flags);
 	end_page_writeback(page);
 	return;
 
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
-	return;
+	bh_uptodate_unlock_irqrestore(first, flags);
 }
 EXPORT_SYMBOL(end_buffer_async_write);
 
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -90,8 +90,7 @@  static void ext4_finish_bio(struct bio *
 		 * We check all buffers in the page under BH_Uptodate_Lock
 		 * to avoid races with other end io clearing async_write flags
 		 */
-		local_irq_save(flags);
-		bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+		flags = bh_uptodate_lock_irqsave(head);
 		do {
 			if (bh_offset(bh) < bio_start ||
 			    bh_offset(bh) + bh->b_size > bio_end) {
@@ -103,8 +102,7 @@  static void ext4_finish_bio(struct bio *
 			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
-		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
-		local_irq_restore(flags);
+		bh_uptodate_unlock_irqrestore(head, flags);
 		if (!under_io) {
 			fscrypt_free_bounce_page(bounce_page);
 			end_page_writeback(page);
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -92,8 +92,7 @@  static void ntfs_end_buffer_async_read(s
 				"0x%llx.", (unsigned long long)bh->b_blocknr);
 	}
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	flags = bh_uptodate_lock_irqsave(first);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -108,8 +107,7 @@  static void ntfs_end_buffer_async_read(s
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	bh_uptodate_unlock_irqrestore(first, flags);
 	/*
 	 * If none of the buffers had errors then we can set the page uptodate,
 	 * but we first have to perform the post read mst fixups, if the
@@ -142,9 +140,7 @@  static void ntfs_end_buffer_async_read(s
 	unlock_page(page);
 	return;
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
-	return;
+	bh_uptodate_unlock_irqrestore(first, flags);
 }
 
 /**
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -78,6 +78,22 @@  struct buffer_head {
 	atomic_t b_count;		/* users using this buffer_head */
 };
 
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
+	return flags;
+}
+
+static inline void
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
+{
+	bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
+	local_irq_restore(flags);
+}
+
 /*
  * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  * and buffer_foo() functions.