Message ID | 20240226094936.2677493-4-kernel@pankajraghav.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | enable bs > ps in XFS | expand |
On Mon, Feb 26, 2024 at 10:49:26AM +0100, Pankaj Raghav (Samsung) wrote: > From: Luis Chamberlain <mcgrof@kernel.org> > > Supporting mapping_min_order implies that we guarantee each folio in the > page cache has at least an order of mapping_min_order. So when adding new > folios to the page cache we must ensure the index used is aligned to the > mapping_min_order as the page cache requires the index to be aligned to > the order of the folio. This seems like a remarkably complicated way of achieving: diff --git a/mm/filemap.c b/mm/filemap.c index 5603ced05fb7..36105dad4440 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2427,9 +2427,11 @@ static int filemap_update_page(struct kiocb *iocb, } static int filemap_create_folio(struct file *file, - struct address_space *mapping, pgoff_t index, + struct address_space *mapping, loff_t pos, struct folio_batch *fbatch) { + pgoff_t index; + unsigned int min_order; struct folio *folio; int error; @@ -2451,6 +2453,8 @@ static int filemap_create_folio(struct file *file, * well to keep locking rules simple. */ filemap_invalidate_lock_shared(mapping); + min_order = mapping_min_folio_order(mapping); + index = (pos >> (min_order + PAGE_SHIFT)) << min_order; error = filemap_add_folio(mapping, folio, index, mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error == -EEXIST) @@ -2511,8 +2515,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count, if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) return -EAGAIN; - err = filemap_create_folio(filp, mapping, - iocb->ki_pos >> PAGE_SHIFT, fbatch); + err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch); if (err == AOP_TRUNCATED_PAGE) goto retry; return err;
On Mon, Feb 26, 2024 at 02:40:42PM +0000, Matthew Wilcox wrote: > On Mon, Feb 26, 2024 at 10:49:26AM +0100, Pankaj Raghav (Samsung) wrote: > > From: Luis Chamberlain <mcgrof@kernel.org> > > > > Supporting mapping_min_order implies that we guarantee each folio in the > > page cache has at least an order of mapping_min_order. So when adding new > > folios to the page cache we must ensure the index used is aligned to the > > mapping_min_order as the page cache requires the index to be aligned to > > the order of the folio. > > This seems like a remarkably complicated way of achieving: > > diff --git a/mm/filemap.c b/mm/filemap.c > index 5603ced05fb7..36105dad4440 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2427,9 +2427,11 @@ static int filemap_update_page(struct kiocb *iocb, > } > > static int filemap_create_folio(struct file *file, > - struct address_space *mapping, pgoff_t index, > + struct address_space *mapping, loff_t pos, > struct folio_batch *fbatch) > { > + pgoff_t index; > + unsigned int min_order; > struct folio *folio; > int error; > > @@ -2451,6 +2453,8 @@ static int filemap_create_folio(struct file *file, > * well to keep locking rules simple. > */ > filemap_invalidate_lock_shared(mapping); > + min_order = mapping_min_folio_order(mapping); > + index = (pos >> (min_order + PAGE_SHIFT)) << min_order; That is some cool mathfu. I will add a comment here as it might not be that obvious to some people (i.e me). Thanks. > error = filemap_add_folio(mapping, folio, index, > mapping_gfp_constraint(mapping, GFP_KERNEL)); > if (error == -EEXIST) > @@ -2511,8 +2515,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count, > if (!folio_batch_count(fbatch)) { > if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) > return -EAGAIN; > - err = filemap_create_folio(filp, mapping, > - iocb->ki_pos >> PAGE_SHIFT, fbatch); > + err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch); > if (err == AOP_TRUNCATED_PAGE) > goto retry; > return err;
On Tue, Feb 27, 2024 at 11:06:37AM +0100, Pankaj Raghav (Samsung) wrote: > On Mon, Feb 26, 2024 at 02:40:42PM +0000, Matthew Wilcox wrote: > > On Mon, Feb 26, 2024 at 10:49:26AM +0100, Pankaj Raghav (Samsung) wrote: > > > From: Luis Chamberlain <mcgrof@kernel.org> > > > > > > Supporting mapping_min_order implies that we guarantee each folio in the > > > page cache has at least an order of mapping_min_order. So when adding new > > > folios to the page cache we must ensure the index used is aligned to the > > > mapping_min_order as the page cache requires the index to be aligned to > > > the order of the folio. > > > > This seems like a remarkably complicated way of achieving: > > > > diff --git a/mm/filemap.c b/mm/filemap.c > > index 5603ced05fb7..36105dad4440 100644 > > --- a/mm/filemap.c > > +++ b/mm/filemap.c > > @@ -2427,9 +2427,11 @@ static int filemap_update_page(struct kiocb *iocb, > > } > > > > static int filemap_create_folio(struct file *file, > > - struct address_space *mapping, pgoff_t index, > > + struct address_space *mapping, loff_t pos, > > struct folio_batch *fbatch) > > { > > + pgoff_t index; > > + unsigned int min_order; > > struct folio *folio; > > int error; > > > > @@ -2451,6 +2453,8 @@ static int filemap_create_folio(struct file *file, > > * well to keep locking rules simple. > > */ > > filemap_invalidate_lock_shared(mapping); > > + min_order = mapping_min_folio_order(mapping); > > + index = (pos >> (min_order + PAGE_SHIFT)) << min_order; > > That is some cool mathfu. I will add a comment here as it might not be > that obvious to some people (i.e me). you guys are both wrong, just use rounddown()
On Tue, Feb 27, 2024 at 11:22:24AM -0500, Kent Overstreet wrote: > On Tue, Feb 27, 2024 at 11:06:37AM +0100, Pankaj Raghav (Samsung) wrote: > > On Mon, Feb 26, 2024 at 02:40:42PM +0000, Matthew Wilcox wrote: > > > On Mon, Feb 26, 2024 at 10:49:26AM +0100, Pankaj Raghav (Samsung) wrote: > > > > From: Luis Chamberlain <mcgrof@kernel.org> > > > > > > > > Supporting mapping_min_order implies that we guarantee each folio in the > > > > page cache has at least an order of mapping_min_order. So when adding new > > > > folios to the page cache we must ensure the index used is aligned to the > > > > mapping_min_order as the page cache requires the index to be aligned to > > > > the order of the folio. > > > > > > This seems like a remarkably complicated way of achieving: > > > > > > diff --git a/mm/filemap.c b/mm/filemap.c > > > index 5603ced05fb7..36105dad4440 100644 > > > --- a/mm/filemap.c > > > +++ b/mm/filemap.c > > > @@ -2427,9 +2427,11 @@ static int filemap_update_page(struct kiocb *iocb, > > > } > > > > > > static int filemap_create_folio(struct file *file, > > > - struct address_space *mapping, pgoff_t index, > > > + struct address_space *mapping, loff_t pos, > > > struct folio_batch *fbatch) > > > { > > > + pgoff_t index; > > > + unsigned int min_order; > > > struct folio *folio; > > > int error; > > > > > > @@ -2451,6 +2453,8 @@ static int filemap_create_folio(struct file *file, > > > * well to keep locking rules simple. > > > */ > > > filemap_invalidate_lock_shared(mapping); > > > + min_order = mapping_min_folio_order(mapping); > > > + index = (pos >> (min_order + PAGE_SHIFT)) << min_order; > > > > That is some cool mathfu. I will add a comment here as it might not be > > that obvious to some people (i.e me). > > you guys are both wrong, just use rounddown() Umm, what do you mean just use rounddown? rounddown to ...? We need to get index that are in PAGE units but aligned to min_order pages. The original patch did this: index = mapping_align_start_index(mapping, iocb->ki_pos >> PAGE_SHIFT); Which is essentially a rounddown operation (probably this is what you are suggesting?). So what willy is proposing will do the same. To me, what I proposed is less complicated but to willy it is the other way around.
On Tue, Feb 27, 2024 at 05:36:09PM +0100, Pankaj Raghav (Samsung) wrote: > On Tue, Feb 27, 2024 at 11:22:24AM -0500, Kent Overstreet wrote: > > On Tue, Feb 27, 2024 at 11:06:37AM +0100, Pankaj Raghav (Samsung) wrote: > > > On Mon, Feb 26, 2024 at 02:40:42PM +0000, Matthew Wilcox wrote: > > > > On Mon, Feb 26, 2024 at 10:49:26AM +0100, Pankaj Raghav (Samsung) wrote: > > > > > From: Luis Chamberlain <mcgrof@kernel.org> > > > > > > > > > > Supporting mapping_min_order implies that we guarantee each folio in the > > > > > page cache has at least an order of mapping_min_order. So when adding new > > > > > folios to the page cache we must ensure the index used is aligned to the > > > > > mapping_min_order as the page cache requires the index to be aligned to > > > > > the order of the folio. > > > > > > > > This seems like a remarkably complicated way of achieving: > > > > > > > > diff --git a/mm/filemap.c b/mm/filemap.c > > > > index 5603ced05fb7..36105dad4440 100644 > > > > --- a/mm/filemap.c > > > > +++ b/mm/filemap.c > > > > @@ -2427,9 +2427,11 @@ static int filemap_update_page(struct kiocb *iocb, > > > > } > > > > > > > > static int filemap_create_folio(struct file *file, > > > > - struct address_space *mapping, pgoff_t index, > > > > + struct address_space *mapping, loff_t pos, > > > > struct folio_batch *fbatch) > > > > { > > > > + pgoff_t index; > > > > + unsigned int min_order; > > > > struct folio *folio; > > > > int error; > > > > > > > > @@ -2451,6 +2453,8 @@ static int filemap_create_folio(struct file *file, > > > > * well to keep locking rules simple. > > > > */ > > > > filemap_invalidate_lock_shared(mapping); > > > > + min_order = mapping_min_folio_order(mapping); > > > > + index = (pos >> (min_order + PAGE_SHIFT)) << min_order; > > > > > > That is some cool mathfu. I will add a comment here as it might not be > > > that obvious to some people (i.e me). > > > > you guys are both wrong, just use rounddown() > > Umm, what do you mean just use rounddown? rounddown to ...? > > We need to get index that are in PAGE units but aligned to min_order > pages. > > The original patch did this: > > index = mapping_align_start_index(mapping, iocb->ki_pos >> PAGE_SHIFT); > > Which is essentially a rounddown operation (probably this is what you > are suggesting?). > > So what willy is proposing will do the same. To me, what I proposed is > less complicated but to willy it is the other way around. Ok, I just found the code for mapping_align_start_index() - it is just a round_down(). Never mind; patch looks fine (aside from perhaps some quibbling over whether the round_down()) should be done before calling readahead or within readahead; I think that might have been more what willy was keying in on)
> > > > > > you guys are both wrong, just use rounddown() > > > > Umm, what do you mean just use rounddown? rounddown to ...? > > > > We need to get index that are in PAGE units but aligned to min_order > > pages. > > > > The original patch did this: > > > > index = mapping_align_start_index(mapping, iocb->ki_pos >> PAGE_SHIFT); > > > > Which is essentially a rounddown operation (probably this is what you > > are suggesting?). > > > > So what willy is proposing will do the same. To me, what I proposed is > > less complicated but to willy it is the other way around. > > Ok, I just found the code for mapping_align_start_index() - it is just a > round_down(). > > Never mind; patch looks fine (aside from perhaps some quibbling over > whether the round_down()) should be done before calling readahead or > within readahead; I think that might have been more what willy was > keying in on) Yeah, exactly. I have one question while I have you here. When we have this support in the page cache, do you think bcachefs can make use of this support to enable bs > ps in bcachefs as it already makes use of large folios? Do you think it is just a simple mapping_set_large_folios -> mapping_set_folio_min_order(.., block_size order) or it requires more effort?
On Tue, Feb 27, 2024 at 05:55:35PM +0100, Pankaj Raghav (Samsung) wrote: > > > > > > > > you guys are both wrong, just use rounddown() > > > > > > Umm, what do you mean just use rounddown? rounddown to ...? > > > > > > We need to get index that are in PAGE units but aligned to min_order > > > pages. > > > > > > The original patch did this: > > > > > > index = mapping_align_start_index(mapping, iocb->ki_pos >> PAGE_SHIFT); > > > > > > Which is essentially a rounddown operation (probably this is what you > > > are suggesting?). > > > > > > So what willy is proposing will do the same. To me, what I proposed is > > > less complicated but to willy it is the other way around. > > > > Ok, I just found the code for mapping_align_start_index() - it is just a > > round_down(). > > > > Never mind; patch looks fine (aside from perhaps some quibbling over > > whether the round_down()) should be done before calling readahead or > > within readahead; I think that might have been more what willy was > > keying in on) > > Yeah, exactly. > > I have one question while I have you here. > > When we have this support in the page cache, do you think bcachefs can make > use of this support to enable bs > ps in bcachefs as it already makes use > of large folios? Yes, of course. > Do you think it is just a simple mapping_set_large_folios -> > mapping_set_folio_min_order(.., block_size order) or it requires more > effort? I think that's all that would be required. There's very little in the way of references to PAGE_SIZE in bcachefs.
> > > > I have one question while I have you here. > > > > When we have this support in the page cache, do you think bcachefs can make > > use of this support to enable bs > ps in bcachefs as it already makes use > > of large folios? > > Yes, of course. > > > Do you think it is just a simple mapping_set_large_folios -> > > mapping_set_folio_min_order(.., block_size order) or it requires more > > effort? > > I think that's all that would be required. There's very little in the > way of references to PAGE_SIZE in bcachefs. Sweet. I will take a look at it once we get this upstream. -- Pankaj
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fc8eb9c94e9c..fe8e1fbb667d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1328,6 +1328,14 @@ struct readahead_control { ._index = i, \ } +#define DEFINE_READAHEAD_ALIGNED(ractl, f, r, m, i) \ + struct readahead_control ractl = { \ + .file = f, \ + .mapping = m, \ + .ra = r, \ + ._index = mapping_align_start_index(m, i), \ + } + #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, diff --git a/mm/filemap.c b/mm/filemap.c index 2b00442b9d19..bdf4f65f597c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2478,11 +2478,11 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count, struct file *filp = iocb->ki_filp; struct address_space *mapping = filp->f_mapping; struct file_ra_state *ra = &filp->f_ra; - pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; - pgoff_t last_index; + pgoff_t index, last_index; struct folio *folio; int err = 0; + index = mapping_align_start_index(mapping, iocb->ki_pos >> PAGE_SHIFT); /* "last_index" is the index of the page beyond the end of the read */ last_index = DIV_ROUND_UP(iocb->ki_pos + count, PAGE_SIZE); retry: @@ -2500,8 +2500,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count, if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) return -EAGAIN; - err = filemap_create_folio(filp, mapping, - iocb->ki_pos >> PAGE_SHIFT, fbatch); + err = filemap_create_folio(filp, mapping, index, fbatch); if (err == AOP_TRUNCATED_PAGE) goto retry; return err; @@ -3093,7 +3092,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; - DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); + DEFINE_READAHEAD_ALIGNED(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; @@ -3147,7 +3146,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2); ra->size = ra->ra_pages; ra->async_size = ra->ra_pages / 4; - ractl._index = ra->start; + ractl._index = mapping_align_start_index(mapping, ra->start); page_cache_ra_order(&ractl, ra, 0); return fpin; } @@ -3162,7 +3161,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, { struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; - DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff); + DEFINE_READAHEAD_ALIGNED(ractl, file, ra, file->f_mapping, vmf->pgoff); struct file *fpin = NULL; unsigned int mmap_miss; @@ -3211,11 +3210,12 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct file *fpin = NULL; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - pgoff_t max_idx, index = vmf->pgoff; + pgoff_t max_idx, index; struct folio *folio; vm_fault_t ret = 0; bool mapping_locked = false; + index = mapping_align_start_index(mapping, vmf->pgoff); max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(index >= max_idx)) return VM_FAULT_SIGBUS; @@ -3321,7 +3321,10 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - vmf->page = folio_file_page(folio, index); + VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping), + folio); + + vmf->page = folio_file_page(folio, vmf->pgoff); return ret | VM_FAULT_LOCKED; page_not_uptodate: @@ -3657,6 +3660,7 @@ static struct folio *do_read_cache_folio(struct address_space *mapping, struct folio *folio; int err; + index = mapping_align_start_index(mapping, index); if (!filler) filler = mapping->a_ops->read_folio; repeat: