Message ID | 20200217184613.19668-12-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Change readahead API | expand |
On Mon, Feb 17, 2020 at 10:45:52AM -0800, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > At allocation time, put the pages in the cache unless we're using > ->readpages. Add the readahead_for_each() iterator for the benefit of > the ->readpage fallback. This iterator supports huge pages, even though > none of the filesystems to be converted do yet. This could be better written - took me some time to get my head around it and the code. "When populating the page cache for readahead, mappings that don't use ->readpages need to have their pages added to the page cache before ->readpage is called. Do this insertion earlier so that the pages can be looked up immediately prior to ->readpage calls rather than passing them on a linked list. This early insert functionality is also required by the upcoming ->readahead method that will replace ->readpages. Optimise and simplify the readpage loop by adding a readahead_for_each() iterator to provide the pages we need to read. This iterator also supports huge pages, even though none of the filesystems have been converted to use them yet." > +static inline struct page *readahead_page(struct readahead_control *rac) > +{ > + struct page *page; > + > + if (!rac->_nr_pages) > + return NULL; Hmmmm. > + > + page = xa_load(&rac->mapping->i_pages, rac->_start); > + VM_BUG_ON_PAGE(!PageLocked(page), page); > + rac->_batch_count = hpage_nr_pages(page); So we could have rac->_nr_pages = 2, and then we get an order 2 large page returned, and so rac->_batch_count = 4. > + > + return page; > +} > + > +static inline void readahead_next(struct readahead_control *rac) > +{ > + rac->_nr_pages -= rac->_batch_count; > + rac->_start += rac->_batch_count; This results in rac->_nr_pages = -2 (or a huge positive number). That means that readahead_page() will not terminate when it should, and potentially will panic if it doesn't find the page that it thinks should be there at rac->_start + 4... > +#define readahead_for_each(rac, page) \ > + for (; (page = readahead_page(rac)); readahead_next(rac)) > + > /* The number of pages in this readahead block */ > static inline unsigned int readahead_count(struct readahead_control *rac) > { > diff --git a/mm/readahead.c b/mm/readahead.c > index bdc5759000d3..9e430daae42f 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -113,12 +113,11 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, > > EXPORT_SYMBOL(read_cache_pages); > > -static void read_pages(struct readahead_control *rac, struct list_head *pages, > - gfp_t gfp) > +static void read_pages(struct readahead_control *rac, struct list_head *pages) > { > const struct address_space_operations *aops = rac->mapping->a_ops; > + struct page *page; > struct blk_plug plug; > - unsigned page_idx; > > blk_start_plug(&plug); > > @@ -127,19 +126,13 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, > readahead_count(rac)); > /* Clean up the remaining pages */ > put_pages_list(pages); > - goto out; > - } > - > - for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { > - struct page *page = lru_to_page(pages); > - list_del(&page->lru); > - if (!add_to_page_cache_lru(page, rac->mapping, page->index, > - gfp)) > + } else { > + readahead_for_each(rac, page) { > aops->readpage(rac->file, page); > - put_page(page); > + put_page(page); > + } > } Nice simplification and gets rid of the need for rac->mapping, but I still find the aops variable weird. > -out: > blk_finish_plug(&plug); > } > > @@ -159,6 +152,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > unsigned long i; > loff_t isize = i_size_read(inode); > gfp_t gfp_mask = readahead_gfp_mask(mapping); > + bool use_list = mapping->a_ops->readpages; > struct readahead_control rac = { > .mapping = mapping, > .file = filp, [ I do find these unstructured mixes of declarations and initialisations dense and difficult to read.... ] > @@ -196,8 +190,14 @@ void __do_page_cache_readahead(struct address_space *mapping, > page = __page_cache_alloc(gfp_mask); > if (!page) > break; > - page->index = offset; > - list_add(&page->lru, &page_pool); > + if (use_list) { > + page->index = offset; > + list_add(&page->lru, &page_pool); > + } else if (add_to_page_cache_lru(page, mapping, offset, > + gfp_mask) < 0) { > + put_page(page); > + goto read; > + } Ok, so that's why you put read code at the end of the loop. To turn the code into spaghetti :/ How much does this simplify down when we get rid of ->readpages and can restructure the loop? This really seems like you're trying to flatten two nested loops into one by the use of goto.... Cheers, Dave.
On Tue, Feb 18, 2020 at 05:14:59PM +1100, Dave Chinner wrote: > On Mon, Feb 17, 2020 at 10:45:52AM -0800, Matthew Wilcox wrote: > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > > > At allocation time, put the pages in the cache unless we're using > > ->readpages. Add the readahead_for_each() iterator for the benefit of > > the ->readpage fallback. This iterator supports huge pages, even though > > none of the filesystems to be converted do yet. > > This could be better written - took me some time to get my head > around it and the code. > > "When populating the page cache for readahead, mappings that don't > use ->readpages need to have their pages added to the page cache > before ->readpage is called. Do this insertion earlier so that the > pages can be looked up immediately prior to ->readpage calls rather > than passing them on a linked list. This early insert functionality > is also required by the upcoming ->readahead method that will > replace ->readpages. > > Optimise and simplify the readpage loop by adding a > readahead_for_each() iterator to provide the pages we need to read. > This iterator also supports huge pages, even though none of the > filesystems have been converted to use them yet." Thanks, I'll use that. > > +static inline struct page *readahead_page(struct readahead_control *rac) > > +{ > > + struct page *page; > > + > > + if (!rac->_nr_pages) > > + return NULL; > > Hmmmm. > > > + > > + page = xa_load(&rac->mapping->i_pages, rac->_start); > > + VM_BUG_ON_PAGE(!PageLocked(page), page); > > + rac->_batch_count = hpage_nr_pages(page); > > So we could have rac->_nr_pages = 2, and then we get an order 2 > large page returned, and so rac->_batch_count = 4. Well, no, we couldn't. rac->_nr_pages is incremented by 4 when we add an order-2 page to the readahead. I can put a BUG_ON(rac->_batch_count > rac->_nr_pages) in here to be sure to catch any logic error like that. > > @@ -159,6 +152,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > > unsigned long i; > > loff_t isize = i_size_read(inode); > > gfp_t gfp_mask = readahead_gfp_mask(mapping); > > + bool use_list = mapping->a_ops->readpages; > > struct readahead_control rac = { > > .mapping = mapping, > > .file = filp, > > [ I do find these unstructured mixes of declarations and > initialisations dense and difficult to read.... ] Fair ... although I didn't create this mess, I can tidy it up a bit. > > - page->index = offset; > > - list_add(&page->lru, &page_pool); > > + if (use_list) { > > + page->index = offset; > > + list_add(&page->lru, &page_pool); > > + } else if (add_to_page_cache_lru(page, mapping, offset, > > + gfp_mask) < 0) { > > + put_page(page); > > + goto read; > > + } > > Ok, so that's why you put read code at the end of the loop. To turn > the code into spaghetti :/ > > How much does this simplify down when we get rid of ->readpages and > can restructure the loop? This really seems like you're trying to > flatten two nested loops into one by the use of goto.... I see it as having two failure cases in this loop. One for "page is already present" (which already existed) and one for "allocated a page, but failed to add it to the page cache" (which used to be done later). I didn't want to duplicate the "call read_pages()" code. So I reshuffled the code rather than add a nested loop. I don't think the nested loop is easier to read (we'll be at 5 levels of indentation for some statements). Could do it this way ... @@ -218,18 +218,17 @@ void page_cache_readahead_limit(struct address_space *mapping, } else if (add_to_page_cache_lru(page, mapping, offset, gfp_mask) < 0) { put_page(page); - goto read; +read: + if (readahead_count(&rac)) + read_pages(&rac, &page_pool); + rac._nr_pages = 0; + rac._start = ++offset; + continue; } if (i == nr_to_read - lookahead_size) SetPageReadahead(page); rac._nr_pages++; offset++; - continue; -read: - if (readahead_count(&rac)) - read_pages(&rac, &page_pool); - rac._nr_pages = 0; - rac._start = ++offset; } /* but I'm not sure that's any better.
On 2/17/20 10:45 AM, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > At allocation time, put the pages in the cache unless we're using > ->readpages. Add the readahead_for_each() iterator for the benefit of > the ->readpage fallback. This iterator supports huge pages, even though > none of the filesystems to be converted do yet. > "Also, remove the gfp argument from read_pages(), now that read_pages() no longer does allocation." Generally looks accurate, just a few notes below: > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > include/linux/pagemap.h | 24 ++++++++++++++++++++++++ > mm/readahead.c | 34 +++++++++++++++++----------------- > 2 files changed, 41 insertions(+), 17 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 982ecda2d4a2..3613154e79e4 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -639,8 +639,32 @@ struct readahead_control { > /* private: use the readahead_* accessors instead */ > pgoff_t _start; > unsigned int _nr_pages; > + unsigned int _batch_count; > }; > > +static inline struct page *readahead_page(struct readahead_control *rac) > +{ > + struct page *page; > + > + if (!rac->_nr_pages) > + return NULL; > + > + page = xa_load(&rac->mapping->i_pages, rac->_start); > + VM_BUG_ON_PAGE(!PageLocked(page), page); > + rac->_batch_count = hpage_nr_pages(page); > + > + return page; > +} > + > +static inline void readahead_next(struct readahead_control *rac) > +{ > + rac->_nr_pages -= rac->_batch_count; > + rac->_start += rac->_batch_count; > +} > + > +#define readahead_for_each(rac, page) \ > + for (; (page = readahead_page(rac)); readahead_next(rac)) > + How about this instead? It uses the "for" loop fully and more naturally, and is easier to read. And it does the same thing: static inline struct page *readahead_page(struct readahead_control *rac) { struct page *page; if (!rac->_nr_pages) return NULL; page = xa_load(&rac->mapping->i_pages, rac->_start); VM_BUG_ON_PAGE(!PageLocked(page), page); rac->_batch_count = hpage_nr_pages(page); return page; } static inline struct page *readahead_next(struct readahead_control *rac) { rac->_nr_pages -= rac->_batch_count; rac->_start += rac->_batch_count; return readahead_page(rac); } #define readahead_for_each(rac, page) \ for (page = readahead_page(rac); page != NULL; \ page = readahead_page(rac)) > /* The number of pages in this readahead block */ > static inline unsigned int readahead_count(struct readahead_control *rac) > { > diff --git a/mm/readahead.c b/mm/readahead.c > index bdc5759000d3..9e430daae42f 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -113,12 +113,11 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, > > EXPORT_SYMBOL(read_cache_pages); > > -static void read_pages(struct readahead_control *rac, struct list_head *pages, > - gfp_t gfp) > +static void read_pages(struct readahead_control *rac, struct list_head *pages) > { > const struct address_space_operations *aops = rac->mapping->a_ops; > + struct page *page; > struct blk_plug plug; > - unsigned page_idx; > > blk_start_plug(&plug); > > @@ -127,19 +126,13 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, > readahead_count(rac)); > /* Clean up the remaining pages */ > put_pages_list(pages); > - goto out; > - } > - > - for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { > - struct page *page = lru_to_page(pages); > - list_del(&page->lru); > - if (!add_to_page_cache_lru(page, rac->mapping, page->index, > - gfp)) > + } else { > + readahead_for_each(rac, page) { > aops->readpage(rac->file, page); > - put_page(page); > + put_page(page); > + } > } > > -out: > blk_finish_plug(&plug); > } > > @@ -159,6 +152,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > unsigned long i; > loff_t isize = i_size_read(inode); > gfp_t gfp_mask = readahead_gfp_mask(mapping); > + bool use_list = mapping->a_ops->readpages; fwiw, "bool have_readpages" seems like a better name (after all, that's how read_pages() effectively is written: "if you have .readpages, then..."), but I can see both sides of that bikeshed. :) > struct readahead_control rac = { > .mapping = mapping, > .file = filp, > @@ -196,8 +190,14 @@ void __do_page_cache_readahead(struct address_space *mapping, > page = __page_cache_alloc(gfp_mask); > if (!page) > break; > - page->index = offset; > - list_add(&page->lru, &page_pool); > + if (use_list) { > + page->index = offset; > + list_add(&page->lru, &page_pool); > + } else if (add_to_page_cache_lru(page, mapping, offset, > + gfp_mask) < 0) { It would be a little safer from a maintenance point of view, to check for !=0, rather than checking for < 0. Most (all?) existing callers check that way, and it's good to stay with the pack there. > + put_page(page); > + goto read; > + } > if (i == nr_to_read - lookahead_size) > SetPageReadahead(page); > rac._nr_pages++; > @@ -205,7 +205,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > continue; > read: > if (readahead_count(&rac)) > - read_pages(&rac, &page_pool, gfp_mask); > + read_pages(&rac, &page_pool); > rac._nr_pages = 0; > rac._start = ++offset; > } > @@ -216,7 +216,7 @@ void __do_page_cache_readahead(struct address_space *mapping, > * will then handle the error. > */ > if (readahead_count(&rac)) > - read_pages(&rac, &page_pool, gfp_mask); > + read_pages(&rac, &page_pool); > BUG_ON(!list_empty(&page_pool)); > } > > thanks,
On Tue, Feb 18, 2020 at 07:42:22AM -0800, Matthew Wilcox wrote: > On Tue, Feb 18, 2020 at 05:14:59PM +1100, Dave Chinner wrote: > > On Mon, Feb 17, 2020 at 10:45:52AM -0800, Matthew Wilcox wrote: > > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > > > > > At allocation time, put the pages in the cache unless we're using > > > ->readpages. Add the readahead_for_each() iterator for the benefit of > > > the ->readpage fallback. This iterator supports huge pages, even though > > > none of the filesystems to be converted do yet. > > > > This could be better written - took me some time to get my head > > around it and the code. > > > > "When populating the page cache for readahead, mappings that don't > > use ->readpages need to have their pages added to the page cache > > before ->readpage is called. Do this insertion earlier so that the > > pages can be looked up immediately prior to ->readpage calls rather > > than passing them on a linked list. This early insert functionality > > is also required by the upcoming ->readahead method that will > > replace ->readpages. > > > > Optimise and simplify the readpage loop by adding a > > readahead_for_each() iterator to provide the pages we need to read. > > This iterator also supports huge pages, even though none of the > > filesystems have been converted to use them yet." > > Thanks, I'll use that. > > > > +static inline struct page *readahead_page(struct readahead_control *rac) > > > +{ > > > + struct page *page; > > > + > > > + if (!rac->_nr_pages) > > > + return NULL; > > > > Hmmmm. > > > > > + > > > + page = xa_load(&rac->mapping->i_pages, rac->_start); > > > + VM_BUG_ON_PAGE(!PageLocked(page), page); > > > + rac->_batch_count = hpage_nr_pages(page); > > > > So we could have rac->_nr_pages = 2, and then we get an order 2 > > large page returned, and so rac->_batch_count = 4. > > Well, no, we couldn't. rac->_nr_pages is incremented by 4 when we add > an order-2 page to the readahead. I don't see any code that does that. :) i.e. we aren't actually putting high order pages into the page cache here - page_alloc() allocates order-0 pages) - so there's nothing in the patch that tells me how rac->_nr_pages behaves when allocating large pages... IOWs, we have an undocumented assumption in the implementation... > I can put a > BUG_ON(rac->_batch_count > rac->_nr_pages) > in here to be sure to catch any logic error like that. Definitely necessary given that we don't insert large pages for readahead yet. A comment explaining the assumptions that the code makes for large pages is probably in order, too. > > > - page->index = offset; > > > - list_add(&page->lru, &page_pool); > > > + if (use_list) { > > > + page->index = offset; > > > + list_add(&page->lru, &page_pool); > > > + } else if (add_to_page_cache_lru(page, mapping, offset, > > > + gfp_mask) < 0) { > > > + put_page(page); > > > + goto read; > > > + } > > > > Ok, so that's why you put read code at the end of the loop. To turn > > the code into spaghetti :/ > > > > How much does this simplify down when we get rid of ->readpages and > > can restructure the loop? This really seems like you're trying to > > flatten two nested loops into one by the use of goto.... > > I see it as having two failure cases in this loop. One for "page is > already present" (which already existed) and one for "allocated a page, > but failed to add it to the page cache" (which used to be done later). > I didn't want to duplicate the "call read_pages()" code. So I reshuffled > the code rather than add a nested loop. I don't think the nested loop > is easier to read (we'll be at 5 levels of indentation for some statements). > Could do it this way ... Can we move the update of @rac inside read_pages()? The next start offset^Windex we start at is rac._start + rac._nr_pages, right? so read_pages() could do: { if (readahead_count(rac)) { /* do readahead */ } /* advance the readahead cursor */ rac->_start += rac->_nr_pages; rac._nr_pages = 0; } and then we only need to call read_pages() in these cases and so the requirement for avoiding duplicating code is avoided... Cheers, Dave.
On Tue, Feb 18, 2020 at 04:01:43PM -0800, John Hubbard wrote: > How about this instead? It uses the "for" loop fully and more naturally, > and is easier to read. And it does the same thing: > > static inline struct page *readahead_page(struct readahead_control *rac) > { > struct page *page; > > if (!rac->_nr_pages) > return NULL; > > page = xa_load(&rac->mapping->i_pages, rac->_start); > VM_BUG_ON_PAGE(!PageLocked(page), page); > rac->_batch_count = hpage_nr_pages(page); > > return page; > } > > static inline struct page *readahead_next(struct readahead_control *rac) > { > rac->_nr_pages -= rac->_batch_count; > rac->_start += rac->_batch_count; > > return readahead_page(rac); > } > > #define readahead_for_each(rac, page) \ > for (page = readahead_page(rac); page != NULL; \ > page = readahead_page(rac)) I'm assuming you mean 'page = readahead_next(rac)' on that second line. If you keep reading all the way to the penultimate patch, it won't work for iomap ... at least not in the same way.
On 2/18/20 5:02 PM, Matthew Wilcox wrote: > On Tue, Feb 18, 2020 at 04:01:43PM -0800, John Hubbard wrote: >> How about this instead? It uses the "for" loop fully and more naturally, >> and is easier to read. And it does the same thing: >> >> static inline struct page *readahead_page(struct readahead_control *rac) >> { >> struct page *page; >> >> if (!rac->_nr_pages) >> return NULL; >> >> page = xa_load(&rac->mapping->i_pages, rac->_start); >> VM_BUG_ON_PAGE(!PageLocked(page), page); >> rac->_batch_count = hpage_nr_pages(page); >> >> return page; >> } >> >> static inline struct page *readahead_next(struct readahead_control *rac) >> { >> rac->_nr_pages -= rac->_batch_count; >> rac->_start += rac->_batch_count; >> >> return readahead_page(rac); >> } >> >> #define readahead_for_each(rac, page) \ >> for (page = readahead_page(rac); page != NULL; \ >> page = readahead_page(rac)) > > I'm assuming you mean 'page = readahead_next(rac)' on that second line. Yep. :) > > If you keep reading all the way to the penultimate patch, it won't work > for iomap ... at least not in the same way. > OK, getting there... thanks,
On 2/18/20 5:02 PM, Matthew Wilcox wrote: > On Tue, Feb 18, 2020 at 04:01:43PM -0800, John Hubbard wrote: >> How about this instead? It uses the "for" loop fully and more naturally, >> and is easier to read. And it does the same thing: >> >> static inline struct page *readahead_page(struct readahead_control *rac) >> { >> struct page *page; >> >> if (!rac->_nr_pages) >> return NULL; >> >> page = xa_load(&rac->mapping->i_pages, rac->_start); >> VM_BUG_ON_PAGE(!PageLocked(page), page); >> rac->_batch_count = hpage_nr_pages(page); >> >> return page; >> } >> >> static inline struct page *readahead_next(struct readahead_control *rac) >> { >> rac->_nr_pages -= rac->_batch_count; >> rac->_start += rac->_batch_count; >> >> return readahead_page(rac); >> } >> >> #define readahead_for_each(rac, page) \ >> for (page = readahead_page(rac); page != NULL; \ >> page = readahead_page(rac)) > > I'm assuming you mean 'page = readahead_next(rac)' on that second line. > > If you keep reading all the way to the penultimate patch, it won't work > for iomap ... at least not in the same way. > OK, so after an initial look at patch 18's ("iomap: Convert from readpages to readahead") use of readahead_page() and readahead_next(), I'm not sure what I'm missing. Seems like it would work...? thanks,
On Tue, Feb 18, 2020 at 04:01:43PM -0800, John Hubbard wrote: > How about this instead? It uses the "for" loop fully and more naturally, > and is easier to read. And it does the same thing: > > static inline struct page *readahead_page(struct readahead_control *rac) > { > struct page *page; > > if (!rac->_nr_pages) > return NULL; > > page = xa_load(&rac->mapping->i_pages, rac->_start); > VM_BUG_ON_PAGE(!PageLocked(page), page); > rac->_batch_count = hpage_nr_pages(page); > > return page; > } > > static inline struct page *readahead_next(struct readahead_control *rac) > { > rac->_nr_pages -= rac->_batch_count; > rac->_start += rac->_batch_count; > > return readahead_page(rac); > } > > #define readahead_for_each(rac, page) \ > for (page = readahead_page(rac); page != NULL; \ > page = readahead_page(rac)) I'll go you one better ... how about we do this instead: static inline struct page *readahead_page(struct readahead_control *rac) { struct page *page; BUG_ON(rac->_batch_count > rac->_nr_pages); rac->_nr_pages -= rac->_batch_count; rac->_index += rac->_batch_count; rac->_batch_count = 0; if (!rac->_nr_pages) return NULL; page = xa_load(&rac->mapping->i_pages, rac->_index); VM_BUG_ON_PAGE(!PageLocked(page), page); rac->_batch_count = hpage_nr_pages(page); return page; } #define readahead_for_each(rac, page) \ while ((page = readahead_page(rac))) No more readahead_next() to forget to add to filesystems which don't use the readahead_for_each() iterator. Ahem.
On Wed, Feb 19, 2020 at 06:41:17AM -0800, Matthew Wilcox wrote: > #define readahead_for_each(rac, page) \ > while ((page = readahead_page(rac))) > > No more readahead_next() to forget to add to filesystems which don't use > the readahead_for_each() iterator. Ahem. And then kill readahead_for_each and open code the above to make it even more obvious?
On Wed, Feb 19, 2020 at 06:52:46AM -0800, Christoph Hellwig wrote: > On Wed, Feb 19, 2020 at 06:41:17AM -0800, Matthew Wilcox wrote: > > #define readahead_for_each(rac, page) \ > > while ((page = readahead_page(rac))) > > > > No more readahead_next() to forget to add to filesystems which don't use > > the readahead_for_each() iterator. Ahem. > > And then kill readahead_for_each and open code the above to make it > even more obvious? Makes sense.
On 2/19/20 7:01 AM, Matthew Wilcox wrote: > On Wed, Feb 19, 2020 at 06:52:46AM -0800, Christoph Hellwig wrote: >> On Wed, Feb 19, 2020 at 06:41:17AM -0800, Matthew Wilcox wrote: >>> #define readahead_for_each(rac, page) \ >>> while ((page = readahead_page(rac))) >>> >>> No more readahead_next() to forget to add to filesystems which don't use >>> the readahead_for_each() iterator. Ahem. Yes, this looks very clean. And less error-prone, which I definitely appreciate too. :) >> >> And then kill readahead_for_each and open code the above to make it >> even more obvious? > > Makes sense. > Great! thanks,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 982ecda2d4a2..3613154e79e4 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -639,8 +639,32 @@ struct readahead_control { /* private: use the readahead_* accessors instead */ pgoff_t _start; unsigned int _nr_pages; + unsigned int _batch_count; }; +static inline struct page *readahead_page(struct readahead_control *rac) +{ + struct page *page; + + if (!rac->_nr_pages) + return NULL; + + page = xa_load(&rac->mapping->i_pages, rac->_start); + VM_BUG_ON_PAGE(!PageLocked(page), page); + rac->_batch_count = hpage_nr_pages(page); + + return page; +} + +static inline void readahead_next(struct readahead_control *rac) +{ + rac->_nr_pages -= rac->_batch_count; + rac->_start += rac->_batch_count; +} + +#define readahead_for_each(rac, page) \ + for (; (page = readahead_page(rac)); readahead_next(rac)) + /* The number of pages in this readahead block */ static inline unsigned int readahead_count(struct readahead_control *rac) { diff --git a/mm/readahead.c b/mm/readahead.c index bdc5759000d3..9e430daae42f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -113,12 +113,11 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, EXPORT_SYMBOL(read_cache_pages); -static void read_pages(struct readahead_control *rac, struct list_head *pages, - gfp_t gfp) +static void read_pages(struct readahead_control *rac, struct list_head *pages) { const struct address_space_operations *aops = rac->mapping->a_ops; + struct page *page; struct blk_plug plug; - unsigned page_idx; blk_start_plug(&plug); @@ -127,19 +126,13 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, readahead_count(rac)); /* Clean up the remaining pages */ put_pages_list(pages); - goto out; - } - - for (page_idx = 0; page_idx < readahead_count(rac); page_idx++) { - struct page *page = lru_to_page(pages); - list_del(&page->lru); - if (!add_to_page_cache_lru(page, rac->mapping, page->index, - gfp)) + } else { + readahead_for_each(rac, page) { aops->readpage(rac->file, page); - put_page(page); + put_page(page); + } } -out: blk_finish_plug(&plug); } @@ -159,6 +152,7 @@ void __do_page_cache_readahead(struct address_space *mapping, unsigned long i; loff_t isize = i_size_read(inode); gfp_t gfp_mask = readahead_gfp_mask(mapping); + bool use_list = mapping->a_ops->readpages; struct readahead_control rac = { .mapping = mapping, .file = filp, @@ -196,8 +190,14 @@ void __do_page_cache_readahead(struct address_space *mapping, page = __page_cache_alloc(gfp_mask); if (!page) break; - page->index = offset; - list_add(&page->lru, &page_pool); + if (use_list) { + page->index = offset; + list_add(&page->lru, &page_pool); + } else if (add_to_page_cache_lru(page, mapping, offset, + gfp_mask) < 0) { + put_page(page); + goto read; + } if (i == nr_to_read - lookahead_size) SetPageReadahead(page); rac._nr_pages++; @@ -205,7 +205,7 @@ void __do_page_cache_readahead(struct address_space *mapping, continue; read: if (readahead_count(&rac)) - read_pages(&rac, &page_pool, gfp_mask); + read_pages(&rac, &page_pool); rac._nr_pages = 0; rac._start = ++offset; } @@ -216,7 +216,7 @@ void __do_page_cache_readahead(struct address_space *mapping, * will then handle the error. */ if (readahead_count(&rac)) - read_pages(&rac, &page_pool, gfp_mask); + read_pages(&rac, &page_pool); BUG_ON(!list_empty(&page_pool)); }