Message ID | 20200217184613.19668-4-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Change readahead API | expand |
On Mon, Feb 17, 2020 at 10:45:44AM -0800, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > In this patch, only between __do_page_cache_readahead() and > read_pages(), but it will be extended in upcoming patches. Also add > the readahead_count() accessor. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > include/linux/pagemap.h | 17 +++++++++++++++++ > mm/readahead.c | 36 +++++++++++++++++++++--------------- > 2 files changed, 38 insertions(+), 15 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index ccb14b6a16b5..982ecda2d4a2 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -630,6 +630,23 @@ static inline int add_to_page_cache(struct page *page, > return error; > } > > +/* > + * Readahead is of a block of consecutive pages. > + */ > +struct readahead_control { > + struct file *file; > + struct address_space *mapping; > +/* private: use the readahead_* accessors instead */ > + pgoff_t _start; > + unsigned int _nr_pages; > +}; > + > +/* The number of pages in this readahead block */ > +static inline unsigned int readahead_count(struct readahead_control *rac) > +{ > + return rac->_nr_pages; > +} > + > static inline unsigned long dir_pages(struct inode *inode) > { > return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> > diff --git a/mm/readahead.c b/mm/readahead.c > index 12d13b7792da..15329309231f 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -113,26 +113,29 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, > > EXPORT_SYMBOL(read_cache_pages); > > -static void read_pages(struct address_space *mapping, struct file *filp, > - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) > +static void read_pages(struct readahead_control *rac, struct list_head *pages, > + gfp_t gfp) > { > + const struct address_space_operations *aops = rac->mapping->a_ops; > struct blk_plug plug; > unsigned page_idx; Splitting out the aops rather than the mapping here just looks weird, especially as you need the mapping later in the function. Using aops doesn't even reduce the code side.... > > blk_start_plug(&plug); > > - if (mapping->a_ops->readpages) { > - mapping->a_ops->readpages(filp, mapping, pages, nr_pages); > + if (aops->readpages) { > + aops->readpages(rac->file, rac->mapping, pages, > + readahead_count(rac)); > /* Clean up the remaining pages */ > put_pages_list(pages); > goto out; > } > > - for (page_idx = 0; page_idx < nr_pages; page_idx++) { > + for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { > struct page *page = lru_to_page(pages); > list_del(&page->lru); > - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) > - mapping->a_ops->readpage(filp, page); > + if (!add_to_page_cache_lru(page, rac->mapping, page->index, > + gfp)) > + aops->readpage(rac->file, page); ... it just makes this less readable by splitting the if() over two lines... > put_page(page); > } > > @@ -155,9 +158,13 @@ void __do_page_cache_readahead(struct address_space *mapping, > unsigned long end_index; /* The last page we want to read */ > LIST_HEAD(page_pool); > int page_idx; > - unsigned int nr_pages = 0; > loff_t isize = i_size_read(inode); > gfp_t gfp_mask = readahead_gfp_mask(mapping); > + struct readahead_control rac = { > + .mapping = mapping, > + .file = filp, > + ._nr_pages = 0, > + }; No need to initialise _nr_pages to zero, leaving it out will do the same thing. > > if (isize == 0) > return; > @@ -180,10 +187,9 @@ void __do_page_cache_readahead(struct address_space *mapping, > * contiguous pages before continuing with the next > * batch. > */ > - if (nr_pages) > - read_pages(mapping, filp, &page_pool, nr_pages, > - gfp_mask); > - nr_pages = 0; > + if (readahead_count(&rac)) > + read_pages(&rac, &page_pool, gfp_mask); > + rac._nr_pages = 0; Hmmm. Wondering ig it make sense to move the gfp_mask to the readahead control structure - if we have to pass the gfp_mask down all the way along side the rac, then I think it makes sense to do that... Cheers, Dave.
On Tue, Feb 18, 2020 at 04:03:00PM +1100, Dave Chinner wrote: > On Mon, Feb 17, 2020 at 10:45:44AM -0800, Matthew Wilcox wrote: > > +static void read_pages(struct readahead_control *rac, struct list_head *pages, > > + gfp_t gfp) > > { > > + const struct address_space_operations *aops = rac->mapping->a_ops; > > struct blk_plug plug; > > unsigned page_idx; > > Splitting out the aops rather than the mapping here just looks > weird, especially as you need the mapping later in the function. > Using aops doesn't even reduce the code side.... It does in subsequent patches ... I agree it looks a little weird here, but I think in the final form, it makes sense: static void read_pages(struct readahead_control *rac, struct list_head *pages) { const struct address_space_operations *aops = rac->mapping->a_ops; struct page *page; struct blk_plug plug; blk_start_plug(&plug); if (aops->readahead) { aops->readahead(rac); readahead_for_each(rac, page) { unlock_page(page); put_page(page); } } else if (aops->readpages) { aops->readpages(rac->file, rac->mapping, pages, readahead_count(rac)); /* Clean up the remaining pages */ put_pages_list(pages); } else { readahead_for_each(rac, page) { aops->readpage(rac->file, page); put_page(page); } } blk_finish_plug(&plug); } It'll look even better once ->readpages goes away. > > @@ -155,9 +158,13 @@ void __do_page_cache_readahead(struct address_space *mapping, > > unsigned long end_index; /* The last page we want to read */ > > LIST_HEAD(page_pool); > > int page_idx; > > - unsigned int nr_pages = 0; > > loff_t isize = i_size_read(inode); > > gfp_t gfp_mask = readahead_gfp_mask(mapping); > > + struct readahead_control rac = { > > + .mapping = mapping, > > + .file = filp, > > + ._nr_pages = 0, > > + }; > > No need to initialise _nr_pages to zero, leaving it out will do the > same thing. Yes, it does, but I wanted to make it explicit here. > > + if (readahead_count(&rac)) > > + read_pages(&rac, &page_pool, gfp_mask); > > + rac._nr_pages = 0; > > Hmmm. Wondering ig it make sense to move the gfp_mask to the readahead > control structure - if we have to pass the gfp_mask down all the > way along side the rac, then I think it makes sense to do that... So we end up removing it later on in this series, but I do wonder if it would make sense anyway. By the end of the series, we still have this in iomap: if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; and we could get rid of that by passing gfp flags down in the rac. On the other hand, I don't know why it doesn't just use readahead_gfp_mask() here anyway ... Christoph?
On 2/17/20 10:45 AM, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > In this patch, only between __do_page_cache_readahead() and > read_pages(), but it will be extended in upcoming patches. Also add > the readahead_count() accessor. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > include/linux/pagemap.h | 17 +++++++++++++++++ > mm/readahead.c | 36 +++++++++++++++++++++--------------- > 2 files changed, 38 insertions(+), 15 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index ccb14b6a16b5..982ecda2d4a2 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -630,6 +630,23 @@ static inline int add_to_page_cache(struct page *page, > return error; > } > > +/* > + * Readahead is of a block of consecutive pages. > + */ > +struct readahead_control { > + struct file *file; > + struct address_space *mapping; > +/* private: use the readahead_* accessors instead */ Really a minor point, sorry...what about documenting "input", "output", "input/output" instead? I ask because: a) public and private seems sort of meaningless here: even in this initial patch, the code starts off by setting .file, .mapping, and .nr_pages. b) The part that's confusing, and that might benefit from either documentation or naming changes, is the way _nr_pages is used. Is it "number of pages requested to read ahead", or "number of pages just read", or number of pages remaining to be read"? I've had trouble keeping it straight because I recall it being used differently at different points. > + pgoff_t _start; > + unsigned int _nr_pages; > +}; > + > +/* The number of pages in this readahead block */ > +static inline unsigned int readahead_count(struct readahead_control *rac) > +{ > + return rac->_nr_pages; > +} I took a peek at the generated code, and was reassured to see that this realy does work even in the "for" loops. Once in a while I like to get my faith in the compiler renewed. :) > + > static inline unsigned long dir_pages(struct inode *inode) > { > return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> > diff --git a/mm/readahead.c b/mm/readahead.c > index 12d13b7792da..15329309231f 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -113,26 +113,29 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, > > EXPORT_SYMBOL(read_cache_pages); > > -static void read_pages(struct address_space *mapping, struct file *filp, > - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) > +static void read_pages(struct readahead_control *rac, struct list_head *pages, > + gfp_t gfp) > { > + const struct address_space_operations *aops = rac->mapping->a_ops; > struct blk_plug plug; > unsigned page_idx; > > blk_start_plug(&plug); > > - if (mapping->a_ops->readpages) { > - mapping->a_ops->readpages(filp, mapping, pages, nr_pages); > + if (aops->readpages) { > + aops->readpages(rac->file, rac->mapping, pages, > + readahead_count(rac)); > /* Clean up the remaining pages */ > put_pages_list(pages); > goto out; > } > > - for (page_idx = 0; page_idx < nr_pages; page_idx++) { > + for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { > struct page *page = lru_to_page(pages); > list_del(&page->lru); > - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) > - mapping->a_ops->readpage(filp, page); > + if (!add_to_page_cache_lru(page, rac->mapping, page->index, > + gfp)) > + aops->readpage(rac->file, page); > put_page(page); > } > > @@ -155,9 +158,13 @@ void __do_page_cache_readahead(struct address_space *mapping, > unsigned long end_index; /* The last page we want to read */ > LIST_HEAD(page_pool); > int page_idx; > - unsigned int nr_pages = 0; > loff_t isize = i_size_read(inode); > gfp_t gfp_mask = readahead_gfp_mask(mapping); > + struct readahead_control rac = { > + .mapping = mapping, > + .file = filp, > + ._nr_pages = 0, > + }; > > if (isize == 0) > return; > @@ -180,10 +187,9 @@ void __do_page_cache_readahead(struct address_space *mapping, > * contiguous pages before continuing with the next > * batch. > */ > - if (nr_pages) > - read_pages(mapping, filp, &page_pool, nr_pages, > - gfp_mask); > - nr_pages = 0; > + if (readahead_count(&rac)) > + read_pages(&rac, &page_pool, gfp_mask); > + rac._nr_pages = 0; > continue; > } > > @@ -194,7 +200,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > list_add(&page->lru, &page_pool); > if (page_idx == nr_to_read - lookahead_size) > SetPageReadahead(page); > - nr_pages++; > + rac._nr_pages++; > } > > /* > @@ -202,8 +208,8 @@ void __do_page_cache_readahead(struct address_space *mapping, > * uptodate then the caller will launch readpage again, and > * will then handle the error. > */ > - if (nr_pages) > - read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask); > + if (readahead_count(&rac)) > + read_pages(&rac, &page_pool, gfp_mask); > BUG_ON(!list_empty(&page_pool)); > } > > In any case, this patch faithfully preserves the existing logic, so regardless of any documentation decisions, Reviewed-by: John Hubbard <jhubbard@nvidia.com> thanks,
On Tue, Feb 18, 2020 at 05:56:18AM -0800, Matthew Wilcox wrote: > On Tue, Feb 18, 2020 at 04:03:00PM +1100, Dave Chinner wrote: > > On Mon, Feb 17, 2020 at 10:45:44AM -0800, Matthew Wilcox wrote: > > > +static void read_pages(struct readahead_control *rac, struct list_head *pages, > > > + gfp_t gfp) > > > { > > > + const struct address_space_operations *aops = rac->mapping->a_ops; > > > struct blk_plug plug; > > > unsigned page_idx; > > > > Splitting out the aops rather than the mapping here just looks > > weird, especially as you need the mapping later in the function. > > Using aops doesn't even reduce the code side.... > > It does in subsequent patches ... I agree it looks a little weird here, > but I think in the final form, it makes sense: Ok. Perhaps just an additional commit comment to say "read_pages() is changed to be aops centric as @rac abstracts away all other implementation details by the end of the patchset." > > > + if (readahead_count(&rac)) > > > + read_pages(&rac, &page_pool, gfp_mask); > > > + rac._nr_pages = 0; > > > > Hmmm. Wondering ig it make sense to move the gfp_mask to the readahead > > control structure - if we have to pass the gfp_mask down all the > > way along side the rac, then I think it makes sense to do that... > > So we end up removing it later on in this series, but I do wonder if > it would make sense anyway. By the end of the series, we still have > this in iomap: > > if (ctx->rac) /* same as readahead_gfp_mask */ > gfp |= __GFP_NORETRY | __GFP_NOWARN; > > and we could get rid of that by passing gfp flags down in the rac. On the > other hand, I don't know why it doesn't just use readahead_gfp_mask() > here anyway ... Christoph? mapping->gfp_mask is awful. Is it a mask, or is it a valid set of allocation flags? Or both? Some callers to mapping_gfp_constraint() uses it as a mask, some callers to mapping_gfp_constraint() use it as base flags that context specific flags get masked out of, readahead_gfp_mask() callers use it as the entire set of gfp flags for allocation. That whole API sucks - undocumented as to what it's suposed to do and how it's supposed to be used. Hence it's difficult to use correctly or understand whether it's being used correctly. And reading callers only leads to more confusion and crazy code like in do_mpage_readpage() where readahead returns a mask that are used as base flags and normal reads return a masked set of base flags... The iomap code is obviously correct when it comes to gfp flag manipulation. We start with GFP_KERNEL context, then constrain it via the mask held in mapping->gfp_mask, then if it's readahead we allow the allocation to silently fail. Simple to read and understand code, versus having weird code that requires the reader to decipher an undocumented and inconsistent API to understand how the gfp flags have been calculated and are valid. Cheers, Dave.
On Wed, Feb 19, 2020 at 09:46:10AM +1100, Dave Chinner wrote: > On Tue, Feb 18, 2020 at 05:56:18AM -0800, Matthew Wilcox wrote: > > On Tue, Feb 18, 2020 at 04:03:00PM +1100, Dave Chinner wrote: > > > On Mon, Feb 17, 2020 at 10:45:44AM -0800, Matthew Wilcox wrote: > > > > +static void read_pages(struct readahead_control *rac, struct list_head *pages, > > > > + gfp_t gfp) > > > > { > > > > + const struct address_space_operations *aops = rac->mapping->a_ops; > > > > struct blk_plug plug; > > > > unsigned page_idx; > > > > > > Splitting out the aops rather than the mapping here just looks > > > weird, especially as you need the mapping later in the function. > > > Using aops doesn't even reduce the code side.... > > > > It does in subsequent patches ... I agree it looks a little weird here, > > but I think in the final form, it makes sense: > > Ok. Perhaps just an additional commit comment to say "read_pages() is > changed to be aops centric as @rac abstracts away all other > implementation details by the end of the patchset." ACK, will add. > > > > + if (readahead_count(&rac)) > > > > + read_pages(&rac, &page_pool, gfp_mask); > > > > + rac._nr_pages = 0; > > > > > > Hmmm. Wondering ig it make sense to move the gfp_mask to the readahead > > > control structure - if we have to pass the gfp_mask down all the > > > way along side the rac, then I think it makes sense to do that... > > > > So we end up removing it later on in this series, but I do wonder if > > it would make sense anyway. By the end of the series, we still have > > this in iomap: > > > > if (ctx->rac) /* same as readahead_gfp_mask */ > > gfp |= __GFP_NORETRY | __GFP_NOWARN; > > > > and we could get rid of that by passing gfp flags down in the rac. On the > > other hand, I don't know why it doesn't just use readahead_gfp_mask() > > here anyway ... Christoph? > > mapping->gfp_mask is awful. Is it a mask, or is it a valid set of > allocation flags? Or both? Some callers to mapping_gfp_constraint() > uses it as a mask, some callers to mapping_gfp_constraint() use it > as base flags that context specific flags get masked out of, > readahead_gfp_mask() callers use it as the entire set of gfp flags > for allocation. > > That whole API sucks - undocumented as to what it's suposed to do > and how it's supposed to be used. Hence it's difficult to use > correctly or understand whether it's being used correctly. And > reading callers only leads to more confusion and crazy code like in > do_mpage_readpage() where readahead returns a mask that are used as > base flags and normal reads return a masked set of base flags... > > The iomap code is obviously correct when it comes to gfp flag > manipulation. We start with GFP_KERNEL context, then constrain it > via the mask held in mapping->gfp_mask, then if it's readahead we > allow the allocation to silently fail. > > Simple to read and understand code, versus having weird code that > requires the reader to decipher an undocumented and inconsistent API > to understand how the gfp flags have been calculated and are valid. I think a lot of this is not so much a criticism of mapping->gfp_mask as a criticism of the whole GFP flags concept. Some of the flags make allocations more likely to succeed, others make them more likely to fail. Some of them allow the allocator to do more things; some prevent the allocator from doing things it would otherwise do. Some of them aren't flags at all. Some of them are mutually incompatible (and will be warned about if set in combination), some of them will silently win over other flags. I think they made a certain amount of clunky sense when they were added, but they've grown to a point where they don't make sense any more and partly that's because there's nobody standing over the allocator with a flaming sword promising certain death to anyone who adds a new flag without thoroughly documenting its interactions with every other flag. I am no longer a fan of GFP flags ;-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ccb14b6a16b5..982ecda2d4a2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -630,6 +630,23 @@ static inline int add_to_page_cache(struct page *page, return error; } +/* + * Readahead is of a block of consecutive pages. + */ +struct readahead_control { + struct file *file; + struct address_space *mapping; +/* private: use the readahead_* accessors instead */ + pgoff_t _start; + unsigned int _nr_pages; +}; + +/* The number of pages in this readahead block */ +static inline unsigned int readahead_count(struct readahead_control *rac) +{ + return rac->_nr_pages; +} + static inline unsigned long dir_pages(struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> diff --git a/mm/readahead.c b/mm/readahead.c index 12d13b7792da..15329309231f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -113,26 +113,29 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, EXPORT_SYMBOL(read_cache_pages); -static void read_pages(struct address_space *mapping, struct file *filp, - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) +static void read_pages(struct readahead_control *rac, struct list_head *pages, + gfp_t gfp) { + const struct address_space_operations *aops = rac->mapping->a_ops; struct blk_plug plug; unsigned page_idx; blk_start_plug(&plug); - if (mapping->a_ops->readpages) { - mapping->a_ops->readpages(filp, mapping, pages, nr_pages); + if (aops->readpages) { + aops->readpages(rac->file, rac->mapping, pages, + readahead_count(rac)); /* Clean up the remaining pages */ put_pages_list(pages); goto out; } - for (page_idx = 0; page_idx < nr_pages; page_idx++) { + for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { struct page *page = lru_to_page(pages); list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) - mapping->a_ops->readpage(filp, page); + if (!add_to_page_cache_lru(page, rac->mapping, page->index, + gfp)) + aops->readpage(rac->file, page); put_page(page); } @@ -155,9 +158,13 @@ void __do_page_cache_readahead(struct address_space *mapping, unsigned long end_index; /* The last page we want to read */ LIST_HEAD(page_pool); int page_idx; - unsigned int nr_pages = 0; loff_t isize = i_size_read(inode); gfp_t gfp_mask = readahead_gfp_mask(mapping); + struct readahead_control rac = { + .mapping = mapping, + .file = filp, + ._nr_pages = 0, + }; if (isize == 0) return; @@ -180,10 +187,9 @@ void __do_page_cache_readahead(struct address_space *mapping, * contiguous pages before continuing with the next * batch. */ - if (nr_pages) - read_pages(mapping, filp, &page_pool, nr_pages, - gfp_mask); - nr_pages = 0; + if (readahead_count(&rac)) + read_pages(&rac, &page_pool, gfp_mask); + rac._nr_pages = 0; continue; } @@ -194,7 +200,7 @@ void __do_page_cache_readahead(struct address_space *mapping, list_add(&page->lru, &page_pool); if (page_idx == nr_to_read - lookahead_size) SetPageReadahead(page); - nr_pages++; + rac._nr_pages++; } /* @@ -202,8 +208,8 @@ void __do_page_cache_readahead(struct address_space *mapping, * uptodate then the caller will launch readpage again, and * will then handle the error. */ - if (nr_pages) - read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask); + if (readahead_count(&rac)) + read_pages(&rac, &page_pool, gfp_mask); BUG_ON(!list_empty(&page_pool)); }