diff mbox series

[v2,4/8] fs/mpage: use blocks_per_folio instead of blocks_per_page

Message ID 20250204231209.429356-5-mcgrof@kernel.org (mailing list archive)
State New
Headers show
Series enable bs > ps for block devices | expand

Commit Message

Luis Chamberlain Feb. 4, 2025, 11:12 p.m. UTC
From: Hannes Reinecke <hare@suse.de>

Convert mpage to folios and associate the number of blocks with
a folio and not a page.

[mcgrof: keep 1 page request on mpage_read_folio()]
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 fs/mpage.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

Comments

Matthew Wilcox Feb. 17, 2025, 9:58 p.m. UTC | #1
On Tue, Feb 04, 2025 at 03:12:05PM -0800, Luis Chamberlain wrote:
> @@ -182,7 +182,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
>  		goto confused;
>  
>  	block_in_file = folio_pos(folio) >> blkbits;
> -	last_block = block_in_file + args->nr_pages * blocks_per_page;
> +	last_block = block_in_file + args->nr_pages * blocks_per_folio;

In mpage_readahead(), we set args->nr_pages to the nunber of pages (not
folios) being requested.  In mpage_read_folio() we currently set it to
1.  So this is going to read too far ahead for readahead if using large
folios.

I think we need to make nr_pages continue to mean nr_pages.  Or we pass
in nr_bytes or nr_blocks.
Hannes Reinecke Feb. 18, 2025, 3:02 p.m. UTC | #2
On 2/17/25 22:58, Matthew Wilcox wrote:
> On Tue, Feb 04, 2025 at 03:12:05PM -0800, Luis Chamberlain wrote:
>> @@ -182,7 +182,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
>>   		goto confused;
>>   
>>   	block_in_file = folio_pos(folio) >> blkbits;
>> -	last_block = block_in_file + args->nr_pages * blocks_per_page;
>> +	last_block = block_in_file + args->nr_pages * blocks_per_folio;
> 
> In mpage_readahead(), we set args->nr_pages to the nunber of pages (not
> folios) being requested.  In mpage_read_folio() we currently set it to
> 1.  So this is going to read too far ahead for readahead if using large
> folios.
> 
> I think we need to make nr_pages continue to mean nr_pages.  Or we pass
> in nr_bytes or nr_blocks.
> 
I had been pondering this, too, while developing the patch.
The idea I had here was to change counting by pages over to counting by 
folios, as then the logic is essentially unchanged.

Not a big fan of 'nr_pages', as then the question really is how much
data we should read at the end of the day. So I'd rather go with 
'nr_blocks' to avoid any confusion.

Cheers,

Hannes
Luis Chamberlain Feb. 21, 2025, 6:58 p.m. UTC | #3
On Tue, Feb 18, 2025 at 04:02:43PM +0100, Hannes Reinecke wrote:
> On 2/17/25 22:58, Matthew Wilcox wrote:
> > On Tue, Feb 04, 2025 at 03:12:05PM -0800, Luis Chamberlain wrote:
> > > @@ -182,7 +182,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
> > >   		goto confused;
> > >   	block_in_file = folio_pos(folio) >> blkbits;
> > > -	last_block = block_in_file + args->nr_pages * blocks_per_page;
> > > +	last_block = block_in_file + args->nr_pages * blocks_per_folio;
> > 
> > In mpage_readahead(), we set args->nr_pages to the nunber of pages (not
> > folios) being requested.  In mpage_read_folio() we currently set it to
> > 1.  So this is going to read too far ahead for readahead if using large
> > folios.
> > 
> > I think we need to make nr_pages continue to mean nr_pages.  Or we pass
> > in nr_bytes or nr_blocks.
> > 
> I had been pondering this, too, while developing the patch.
> The idea I had here was to change counting by pages over to counting by
> folios, as then the logic is essentially unchanged.
> 
> Not a big fan of 'nr_pages', as then the question really is how much
> data we should read at the end of the day. So I'd rather go with 'nr_blocks'
> to avoid any confusion.

I think the easier answer is to adjust nr_pages in terms of min-order
requirements and fix last_block computation so we don't lie for large
folios as follows. While at it, I noticed a folio_zero_segment() was
missing folio_size().

diff --git a/fs/mpage.c b/fs/mpage.c
index c17d7a724e4b..624bf30f0b2e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -152,6 +152,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 {
 	struct folio *folio = args->folio;
 	struct inode *inode = folio->mapping->host;
+	const unsigned min_nrpages = mapping_min_folio_nrpages(folio->mapping);
 	const unsigned blkbits = inode->i_blkbits;
 	const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
@@ -172,6 +173,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 
 	/* MAX_BUF_PER_PAGE, for example */
 	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
+	VM_BUG_ON_FOLIO(args->nr_pages < min_nrpages, folio);
+	VM_BUG_ON_FOLIO(!IS_ALIGNED(args->nr_pages, min_nrpages), folio);
 
 	if (args->is_readahead) {
 		opf |= REQ_RAHEAD;
@@ -182,7 +185,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 		goto confused;
 
 	block_in_file = folio_pos(folio) >> blkbits;
-	last_block = block_in_file + args->nr_pages * blocks_per_folio;
+	last_block = block_in_file + ((args->nr_pages * PAGE_SIZE) >> blkbits);
 	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 	if (last_block > last_block_in_file)
 		last_block = last_block_in_file;
@@ -269,7 +272,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	}
 
 	if (first_hole != blocks_per_folio) {
-		folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
+		folio_zero_segment(folio, first_hole << blkbits, folio_size(folio));
 		if (first_hole == 0) {
 			folio_mark_uptodate(folio);
 			folio_unlock(folio);
@@ -385,7 +388,7 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
 {
 	struct mpage_readpage_args args = {
 		.folio = folio,
-		.nr_pages = 1,
+		.nr_pages = mapping_min_folio_nrpages(folio->mapping),
 		.get_block = get_block,
 	};
Matthew Wilcox Feb. 21, 2025, 8:25 p.m. UTC | #4
On Fri, Feb 21, 2025 at 10:58:58AM -0800, Luis Chamberlain wrote:
> @@ -385,7 +388,7 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
>  {
>  	struct mpage_readpage_args args = {
>  		.folio = folio,
> -		.nr_pages = 1,
> +		.nr_pages = mapping_min_folio_nrpages(folio->mapping),

		.nr_pages = folio_nr_pages(folio);

since the folio is not necessarily the minimum size.

>  		.get_block = get_block,
>  	};
>
Matthew Wilcox Feb. 21, 2025, 8:27 p.m. UTC | #5
On Fri, Feb 21, 2025 at 10:58:58AM -0800, Luis Chamberlain wrote:
> +++ b/fs/mpage.c
> @@ -152,6 +152,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
>  {
>  	struct folio *folio = args->folio;
>  	struct inode *inode = folio->mapping->host;
> +	const unsigned min_nrpages = mapping_min_folio_nrpages(folio->mapping);
>  	const unsigned blkbits = inode->i_blkbits;
>  	const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
>  	const unsigned blocksize = 1 << blkbits;
> @@ -172,6 +173,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
>  
>  	/* MAX_BUF_PER_PAGE, for example */
>  	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
> +	VM_BUG_ON_FOLIO(args->nr_pages < min_nrpages, folio);
> +	VM_BUG_ON_FOLIO(!IS_ALIGNED(args->nr_pages, min_nrpages), folio);
>  
>  	if (args->is_readahead) {
>  		opf |= REQ_RAHEAD;

Also, I don't think these assertions add any value; we already assert
these things are true in other places.
Luis Chamberlain Feb. 21, 2025, 8:38 p.m. UTC | #6
On Fri, Feb 21, 2025 at 08:25:11PM +0000, Matthew Wilcox wrote:
> On Fri, Feb 21, 2025 at 10:58:58AM -0800, Luis Chamberlain wrote:
> > @@ -385,7 +388,7 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
> >  {
> >  	struct mpage_readpage_args args = {
> >  		.folio = folio,
> > -		.nr_pages = 1,
> > +		.nr_pages = mapping_min_folio_nrpages(folio->mapping),
> 
> 		.nr_pages = folio_nr_pages(folio);
> 
> since the folio is not necessarily the minimum size.

Will roll this in for tests before a new v3.

 Luis
Luis Chamberlain Feb. 21, 2025, 8:39 p.m. UTC | #7
On Fri, Feb 21, 2025 at 08:27:17PM +0000, Matthew Wilcox wrote:
> On Fri, Feb 21, 2025 at 10:58:58AM -0800, Luis Chamberlain wrote:
> > +++ b/fs/mpage.c
> > @@ -152,6 +152,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
> >  {
> >  	struct folio *folio = args->folio;
> >  	struct inode *inode = folio->mapping->host;
> > +	const unsigned min_nrpages = mapping_min_folio_nrpages(folio->mapping);
> >  	const unsigned blkbits = inode->i_blkbits;
> >  	const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
> >  	const unsigned blocksize = 1 << blkbits;
> > @@ -172,6 +173,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
> >  
> >  	/* MAX_BUF_PER_PAGE, for example */
> >  	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
> > +	VM_BUG_ON_FOLIO(args->nr_pages < min_nrpages, folio);
> > +	VM_BUG_ON_FOLIO(!IS_ALIGNED(args->nr_pages, min_nrpages), folio);
> >  
> >  	if (args->is_readahead) {
> >  		opf |= REQ_RAHEAD;
> 
> Also, I don't think these assertions add any value; we already assert
> these things are true in other places.

Sure, it may not have been clear to others but that doesn't mean we
need to be explicit about that in code, the commit log can justify this
alone. Will remove.

  Luis
diff mbox series

Patch

diff --git a/fs/mpage.c b/fs/mpage.c
index a3c82206977f..c17d7a724e4b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -107,7 +107,7 @@  static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
 		 * don't make any buffers if there is only one buffer on
 		 * the folio and the folio just needs to be set up to date
 		 */
-		if (inode->i_blkbits == PAGE_SHIFT &&
+		if (inode->i_blkbits == folio_shift(folio) &&
 		    buffer_uptodate(bh)) {
 			folio_mark_uptodate(folio);
 			return;
@@ -153,7 +153,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	struct folio *folio = args->folio;
 	struct inode *inode = folio->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
+	const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	struct buffer_head *map_bh = &args->map_bh;
 	sector_t block_in_file;
@@ -161,7 +161,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	sector_t last_block_in_file;
 	sector_t first_block;
 	unsigned page_block;
-	unsigned first_hole = blocks_per_page;
+	unsigned first_hole = blocks_per_folio;
 	struct block_device *bdev = NULL;
 	int length;
 	int fully_mapped = 1;
@@ -182,7 +182,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 		goto confused;
 
 	block_in_file = folio_pos(folio) >> blkbits;
-	last_block = block_in_file + args->nr_pages * blocks_per_page;
+	last_block = block_in_file + args->nr_pages * blocks_per_folio;
 	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 	if (last_block > last_block_in_file)
 		last_block = last_block_in_file;
@@ -204,7 +204,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 				clear_buffer_mapped(map_bh);
 				break;
 			}
-			if (page_block == blocks_per_page)
+			if (page_block == blocks_per_folio)
 				break;
 			page_block++;
 			block_in_file++;
@@ -216,7 +216,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	 * Then do more get_blocks calls until we are done with this folio.
 	 */
 	map_bh->b_folio = folio;
-	while (page_block < blocks_per_page) {
+	while (page_block < blocks_per_folio) {
 		map_bh->b_state = 0;
 		map_bh->b_size = 0;
 
@@ -229,7 +229,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 
 		if (!buffer_mapped(map_bh)) {
 			fully_mapped = 0;
-			if (first_hole == blocks_per_page)
+			if (first_hole == blocks_per_folio)
 				first_hole = page_block;
 			page_block++;
 			block_in_file++;
@@ -247,7 +247,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 			goto confused;
 		}
 	
-		if (first_hole != blocks_per_page)
+		if (first_hole != blocks_per_folio)
 			goto confused;		/* hole -> non-hole */
 
 		/* Contiguous blocks? */
@@ -260,7 +260,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 			if (relative_block == nblocks) {
 				clear_buffer_mapped(map_bh);
 				break;
-			} else if (page_block == blocks_per_page)
+			} else if (page_block == blocks_per_folio)
 				break;
 			page_block++;
 			block_in_file++;
@@ -268,7 +268,7 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 		bdev = map_bh->b_bdev;
 	}
 
-	if (first_hole != blocks_per_page) {
+	if (first_hole != blocks_per_folio) {
 		folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
 		if (first_hole == 0) {
 			folio_mark_uptodate(folio);
@@ -303,10 +303,10 @@  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	relative_block = block_in_file - args->first_logical_block;
 	nblocks = map_bh->b_size >> blkbits;
 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
-	    (first_hole != blocks_per_page))
+	    (first_hole != blocks_per_folio))
 		args->bio = mpage_bio_submit_read(args->bio);
 	else
-		args->last_block_in_bio = first_block + blocks_per_page - 1;
+		args->last_block_in_bio = first_block + blocks_per_folio - 1;
 out:
 	return args->bio;
 
@@ -456,12 +456,12 @@  static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
 	struct address_space *mapping = folio->mapping;
 	struct inode *inode = mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
+	const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
 	sector_t last_block;
 	sector_t block_in_file;
 	sector_t first_block;
 	unsigned page_block;
-	unsigned first_unmapped = blocks_per_page;
+	unsigned first_unmapped = blocks_per_folio;
 	struct block_device *bdev = NULL;
 	int boundary = 0;
 	sector_t boundary_block = 0;
@@ -486,12 +486,12 @@  static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
 				 */
 				if (buffer_dirty(bh))
 					goto confused;
-				if (first_unmapped == blocks_per_page)
+				if (first_unmapped == blocks_per_folio)
 					first_unmapped = page_block;
 				continue;
 			}
 
-			if (first_unmapped != blocks_per_page)
+			if (first_unmapped != blocks_per_folio)
 				goto confused;	/* hole -> non-hole */
 
 			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
@@ -536,7 +536,7 @@  static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
 		goto page_is_mapped;
 	last_block = (i_size - 1) >> blkbits;
 	map_bh.b_folio = folio;
-	for (page_block = 0; page_block < blocks_per_page; ) {
+	for (page_block = 0; page_block < blocks_per_folio; ) {
 
 		map_bh.b_state = 0;
 		map_bh.b_size = 1 << blkbits;
@@ -618,14 +618,14 @@  static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
 	BUG_ON(folio_test_writeback(folio));
 	folio_start_writeback(folio);
 	folio_unlock(folio);
-	if (boundary || (first_unmapped != blocks_per_page)) {
+	if (boundary || (first_unmapped != blocks_per_folio)) {
 		bio = mpage_bio_submit_write(bio);
 		if (boundary_block) {
 			write_boundary_block(boundary_bdev,
 					boundary_block, 1 << blkbits);
 		}
 	} else {
-		mpd->last_block_in_bio = first_block + blocks_per_page - 1;
+		mpd->last_block_in_bio = first_block + blocks_per_folio - 1;
 	}
 	goto out;