diff mbox series

[v3] fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t

Message ID 20191118132824.rclhrbujqh4b4g4d@linutronix.de (mailing list archive)
State New, archived
Headers show
Series [v3] fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t | expand

Commit Message

Sebastian Sewior Nov. 18, 2019, 1:28 p.m. UTC
From: Thomas Gleixner <tglx@linutronix.de>

Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
disable preemption, which is undesired for latency reasons and breaks when
regular spinlocks are taken within the bit_spinlock locked region because
regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
replaces the bit spinlocks with regular spinlocks to avoid this problem.
Bit spinlocks are also not covered by lock debugging, e.g. lockdep.

Substitute the BH_Uptodate_Lock bit spinlock with a regular spinlock.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bigeasy: remove the wrapper and use always spinlock_t and move it into
          the padding hole]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
v2…v3: rename uptodate_lock to b_uptodate_lock.

v1…v2: Move the spinlock_t to the padding hole as per Jan Kara. pahole says
its total size remained unchanged, before

| atomic_t                   b_count;              /*    96     4 */
|
| /* size: 104, cachelines: 2, members: 12 */
| /* padding: 4 */
| /* last cacheline: 40 bytes */

after

| atomic_t                   b_count;              /*    96     4 */
| spinlock_t                 uptodate_lock;        /*   100     4 */
|
| /* size: 104, cachelines: 2, members: 13 */
| /* last cacheline: 40 bytes */

 fs/buffer.c                 | 19 +++++++------------
 fs/ext4/page-io.c           |  8 +++-----
 fs/ntfs/aops.c              |  9 +++------
 include/linux/buffer_head.h |  6 +++---
 4 files changed, 16 insertions(+), 26 deletions(-)

Comments

Jan Kara Nov. 19, 2019, 9:30 a.m. UTC | #1
On Mon 18-11-19 14:28:24, Sebastian Siewior wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
> disable preemption, which is undesired for latency reasons and breaks when
> regular spinlocks are taken within the bit_spinlock locked region because
> regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
> replaces the bit spinlocks with regular spinlocks to avoid this problem.
> Bit spinlocks are also not covered by lock debugging, e.g. lockdep.
> 
> Substitute the BH_Uptodate_Lock bit spinlock with a regular spinlock.
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> [bigeasy: remove the wrapper and use always spinlock_t and move it into
>           the padding hole]
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

OK, how do we push this? Do you plan to push this through tip tree?

								Honza

> ---
> v2…v3: rename uptodate_lock to b_uptodate_lock.
> 
> v1…v2: Move the spinlock_t to the padding hole as per Jan Kara. pahole says
> its total size remained unchanged, before
> 
> | atomic_t                   b_count;              /*    96     4 */
> |
> | /* size: 104, cachelines: 2, members: 12 */
> | /* padding: 4 */
> | /* last cacheline: 40 bytes */
> 
> after
> 
> | atomic_t                   b_count;              /*    96     4 */
> | spinlock_t                 uptodate_lock;        /*   100     4 */
> |
> | /* size: 104, cachelines: 2, members: 13 */
> | /* last cacheline: 40 bytes */
> 
>  fs/buffer.c                 | 19 +++++++------------
>  fs/ext4/page-io.c           |  8 +++-----
>  fs/ntfs/aops.c              |  9 +++------
>  include/linux/buffer_head.h |  6 +++---
>  4 files changed, 16 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 86a38b9793235..4baea587981e0 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -275,8 +275,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  	 * decide that the page is now completely done.
>  	 */
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	spin_lock_irqsave(&first->b_uptodate_lock, flags);
>  	clear_buffer_async_read(bh);
>  	unlock_buffer(bh);
>  	tmp = bh;
> @@ -289,8 +288,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  		}
>  		tmp = tmp->b_this_page;
>  	} while (tmp != bh);
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  
>  	/*
>  	 * If none of the buffers had errors and they are all
> @@ -302,8 +300,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  	return;
>  
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  	return;
>  }
>  
> @@ -331,8 +328,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
>  	}
>  
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	spin_lock_irqsave(&first->b_uptodate_lock, flags);
>  
>  	clear_buffer_async_write(bh);
>  	unlock_buffer(bh);
> @@ -344,14 +340,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
>  		}
>  		tmp = tmp->b_this_page;
>  	}
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  	end_page_writeback(page);
>  	return;
>  
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  	return;
>  }
>  EXPORT_SYMBOL(end_buffer_async_write);
> @@ -3368,6 +3362,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
>  	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
>  	if (ret) {
>  		INIT_LIST_HEAD(&ret->b_assoc_buffers);
> +		spin_lock_init(&ret->b_uptodate_lock);
>  		preempt_disable();
>  		__this_cpu_inc(bh_accounting.nr);
>  		recalc_bh_state();
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index 12ceadef32c5a..64d4c06fbf836 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -87,11 +87,10 @@ static void ext4_finish_bio(struct bio *bio)
>  		}
>  		bh = head = page_buffers(page);
>  		/*
> -		 * We check all buffers in the page under BH_Uptodate_Lock
> +		 * We check all buffers in the page under b_uptodate_lock
>  		 * to avoid races with other end io clearing async_write flags
>  		 */
> -		local_irq_save(flags);
> -		bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
> +		spin_lock_irqsave(&head->b_uptodate_lock, flags);
>  		do {
>  			if (bh_offset(bh) < bio_start ||
>  			    bh_offset(bh) + bh->b_size > bio_end) {
> @@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio *bio)
>  			if (bio->bi_status)
>  				buffer_io_error(bh);
>  		} while ((bh = bh->b_this_page) != head);
> -		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
> -		local_irq_restore(flags);
> +		spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
>  		if (!under_io) {
>  			fscrypt_free_bounce_page(bounce_page);
>  			end_page_writeback(page);
> diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
> index 7202a1e39d70c..554b744f41bf8 100644
> --- a/fs/ntfs/aops.c
> +++ b/fs/ntfs/aops.c
> @@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  				"0x%llx.", (unsigned long long)bh->b_blocknr);
>  	}
>  	first = page_buffers(page);
> -	local_irq_save(flags);
> -	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +	spin_lock_irqsave(&first->b_uptodate_lock, flags);
>  	clear_buffer_async_read(bh);
>  	unlock_buffer(bh);
>  	tmp = bh;
> @@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  		}
>  		tmp = tmp->b_this_page;
>  	} while (tmp != bh);
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  	/*
>  	 * If none of the buffers had errors then we can set the page uptodate,
>  	 * but we first have to perform the post read mst fixups, if the
> @@ -142,8 +140,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
>  	unlock_page(page);
>  	return;
>  still_busy:
> -	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> -	local_irq_restore(flags);
> +	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
>  	return;
>  }
>  
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 7b73ef7f902d4..e0b020eaf32e2 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -22,9 +22,6 @@ enum bh_state_bits {
>  	BH_Dirty,	/* Is dirty */
>  	BH_Lock,	/* Is locked */
>  	BH_Req,		/* Has been submitted for I/O */
> -	BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
> -			  * IO completion of other buffers in the page
> -			  */
>  
>  	BH_Mapped,	/* Has a disk mapping */
>  	BH_New,		/* Disk mapping was newly created by get_block */
> @@ -76,6 +73,9 @@ struct buffer_head {
>  	struct address_space *b_assoc_map;	/* mapping this buffer is
>  						   associated with */
>  	atomic_t b_count;		/* users using this buffer_head */
> +	spinlock_t b_uptodate_lock;	/* Used by the first bh in a page, to
> +					 * serialise IO completion of other
> +					 * buffers in the page */
>  };
>  
>  /*
> -- 
> 2.24.0
>
Sebastian Sewior Nov. 19, 2019, 10 a.m. UTC | #2
On 2019-11-19 10:30:01 [+0100], Jan Kara wrote:
> OK, how do we push this? Do you plan to push this through tip tree?

Is it reasonable to push this via the ext4 tree?

> 								Honza

Sebastian
Jan Kara Nov. 19, 2019, 11:59 a.m. UTC | #3
On Tue 19-11-19 11:00:22, Sebastian Siewior wrote:
> On 2019-11-19 10:30:01 [+0100], Jan Kara wrote:
> > OK, how do we push this? Do you plan to push this through tip tree?
> 
> Is it reasonable to push this via the ext4 tree?

I don't know and I don't think it largely matters. That code is rarely
touched. Lately changes to that code have been flowing through block tree
(Jens Axboe) or iomap tree (Darrick Wong) because those were the people
needing the changes in...

								Honza
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 86a38b9793235..4baea587981e0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -275,8 +275,7 @@  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	 * decide that the page is now completely done.
 	 */
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	spin_lock_irqsave(&first->b_uptodate_lock, flags);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -289,8 +288,7 @@  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 
 	/*
 	 * If none of the buffers had errors and they are all
@@ -302,8 +300,7 @@  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	return;
 
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 	return;
 }
 
@@ -331,8 +328,7 @@  void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 	}
 
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	spin_lock_irqsave(&first->b_uptodate_lock, flags);
 
 	clear_buffer_async_write(bh);
 	unlock_buffer(bh);
@@ -344,14 +340,12 @@  void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	}
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 	end_page_writeback(page);
 	return;
 
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 	return;
 }
 EXPORT_SYMBOL(end_buffer_async_write);
@@ -3368,6 +3362,7 @@  struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
 	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
 	if (ret) {
 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
+		spin_lock_init(&ret->b_uptodate_lock);
 		preempt_disable();
 		__this_cpu_inc(bh_accounting.nr);
 		recalc_bh_state();
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 12ceadef32c5a..64d4c06fbf836 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -87,11 +87,10 @@  static void ext4_finish_bio(struct bio *bio)
 		}
 		bh = head = page_buffers(page);
 		/*
-		 * We check all buffers in the page under BH_Uptodate_Lock
+		 * We check all buffers in the page under b_uptodate_lock
 		 * to avoid races with other end io clearing async_write flags
 		 */
-		local_irq_save(flags);
-		bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+		spin_lock_irqsave(&head->b_uptodate_lock, flags);
 		do {
 			if (bh_offset(bh) < bio_start ||
 			    bh_offset(bh) + bh->b_size > bio_end) {
@@ -103,8 +102,7 @@  static void ext4_finish_bio(struct bio *bio)
 			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
-		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
-		local_irq_restore(flags);
+		spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
 		if (!under_io) {
 			fscrypt_free_bounce_page(bounce_page);
 			end_page_writeback(page);
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7202a1e39d70c..554b744f41bf8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -92,8 +92,7 @@  static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 				"0x%llx.", (unsigned long long)bh->b_blocknr);
 	}
 	first = page_buffers(page);
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	spin_lock_irqsave(&first->b_uptodate_lock, flags);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -108,8 +107,7 @@  static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 	/*
 	 * If none of the buffers had errors then we can set the page uptodate,
 	 * but we first have to perform the post read mst fixups, if the
@@ -142,8 +140,7 @@  static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	unlock_page(page);
 	return;
 still_busy:
-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
 	return;
 }
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 7b73ef7f902d4..e0b020eaf32e2 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -22,9 +22,6 @@  enum bh_state_bits {
 	BH_Dirty,	/* Is dirty */
 	BH_Lock,	/* Is locked */
 	BH_Req,		/* Has been submitted for I/O */
-	BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
-			  * IO completion of other buffers in the page
-			  */
 
 	BH_Mapped,	/* Has a disk mapping */
 	BH_New,		/* Disk mapping was newly created by get_block */
@@ -76,6 +73,9 @@  struct buffer_head {
 	struct address_space *b_assoc_map;	/* mapping this buffer is
 						   associated with */
 	atomic_t b_count;		/* users using this buffer_head */
+	spinlock_t b_uptodate_lock;	/* Used by the first bh in a page, to
+					 * serialise IO completion of other
+					 * buffers in the page */
 };
 
 /*