Message ID | 20200824145511.10500-6-willy@infradead.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | THP iomap patches for 5.10 | expand |
On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote: > Size the uptodate array dynamically to support larger pages in the > page cache. With a 64kB page, we're only saving 8 bytes per page today, > but with a 2MB maximum page size, we'd have to allocate more than 4kB > per page. Add a few debugging assertions. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/iomap/buffered-io.c | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index dbf9572dabe9..844e95cacea8 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -22,18 +22,19 @@ > #include "../internal.h" > > /* > - * Structure allocated for each page when block size < PAGE_SIZE to track > + * Structure allocated for each page when block size < page size to track > * sub-page uptodate status and I/O completions. > */ > struct iomap_page { > atomic_t read_count; > atomic_t write_count; > spinlock_t uptodate_lock; > - DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); > + unsigned long uptodate[]; > }; > > static inline struct iomap_page *to_iomap_page(struct page *page) > { > + VM_BUG_ON_PGFLAGS(PageTail(page), page); > if (page_has_private(page)) > return (struct iomap_page *)page_private(page); > return NULL; Just to confirm: this vm bug check is to needed becuse we only attach the iomap_page to the head page of a compound page? Assuming that I've understood the above correctly: Reviewed-by: Dave Chinner <dchinner@redhat.com>
On Tue, Aug 25, 2020 at 09:59:18AM +1000, Dave Chinner wrote: > On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote: > > static inline struct iomap_page *to_iomap_page(struct page *page) > > { > > + VM_BUG_ON_PGFLAGS(PageTail(page), page); > > if (page_has_private(page)) > > return (struct iomap_page *)page_private(page); > > return NULL; > > Just to confirm: this vm bug check is to needed becuse we only > attach the iomap_page to the head page of a compound page? > > Assuming that I've understood the above correctly: That's correct. If we get a tail page in this path, something's gone wrong somewhere upstream of us, and the stack trace should tell us where. It's PGFLAGS so it's usually compiled out by distro kernels (you need to enable CONFIG_DEBUG_VM_PGFLAGS for it to be active). It was definitely useful in development; probably not so useful for a distro kernel to assert.
On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote: > Size the uptodate array dynamically to support larger pages in the > page cache. With a 64kB page, we're only saving 8 bytes per page today, > but with a 2MB maximum page size, we'd have to allocate more than 4kB > per page. Add a few debugging assertions. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/iomap/buffered-io.c | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index dbf9572dabe9..844e95cacea8 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -22,18 +22,19 @@ > #include "../internal.h" > > /* > - * Structure allocated for each page when block size < PAGE_SIZE to track > + * Structure allocated for each page when block size < page size to track > * sub-page uptodate status and I/O completions. "for each regular page or head page of a huge page"? Or whatever we're calling them nowadays? --D > */ > struct iomap_page { > atomic_t read_count; > atomic_t write_count; > spinlock_t uptodate_lock; > - DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); > + unsigned long uptodate[]; > }; > > static inline struct iomap_page *to_iomap_page(struct page *page) > { > + VM_BUG_ON_PGFLAGS(PageTail(page), page); > if (page_has_private(page)) > return (struct iomap_page *)page_private(page); > return NULL; > @@ -45,11 +46,13 @@ static struct iomap_page * > iomap_page_create(struct inode *inode, struct page *page) > { > struct iomap_page *iop = to_iomap_page(page); > + unsigned int nr_blocks = i_blocks_per_page(inode, page); > > - if (iop || i_blocks_per_page(inode, page) <= 1) > + if (iop || nr_blocks <= 1) > return iop; > > - iop = kzalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); > + iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), > + GFP_NOFS | __GFP_NOFAIL); > spin_lock_init(&iop->uptodate_lock); > attach_page_private(page, iop); > return iop; > @@ -59,11 +62,14 @@ static void > iomap_page_release(struct page *page) > { > struct iomap_page *iop = detach_page_private(page); > + unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page); > > if (!iop) > return; > WARN_ON_ONCE(atomic_read(&iop->read_count)); > WARN_ON_ONCE(atomic_read(&iop->write_count)); > + WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) != > + PageUptodate(page)); > kfree(iop); > } > > -- > 2.28.0 >
On Tue, Aug 25, 2020 at 02:02:03PM -0700, Darrick J. Wong wrote: > > /* > > - * Structure allocated for each page when block size < PAGE_SIZE to track > > + * Structure allocated for each page when block size < page size to track > > * sub-page uptodate status and I/O completions. > > "for each regular page or head page of a huge page"? Or whatever we're > calling them nowadays? Well, that's what I'm calling a "page" ;-) How about "for each page or THP"? The fact that it's stored in the head page is incidental -- it's allocated for the THP.
On Wed, Aug 26, 2020 at 03:26:23AM +0100, Matthew Wilcox wrote: > On Tue, Aug 25, 2020 at 02:02:03PM -0700, Darrick J. Wong wrote: > > > /* > > > - * Structure allocated for each page when block size < PAGE_SIZE to track > > > + * Structure allocated for each page when block size < page size to track > > > * sub-page uptodate status and I/O completions. > > > > "for each regular page or head page of a huge page"? Or whatever we're > > calling them nowadays? > > Well, that's what I'm calling a "page" ;-) > > How about "for each page or THP"? The fact that it's stored in the > head page is incidental -- it's allocated for the THP. Ok. --D
> static inline struct iomap_page *to_iomap_page(struct page *page) > { > + VM_BUG_ON_PGFLAGS(PageTail(page), page); > if (page_has_private(page)) > return (struct iomap_page *)page_private(page); > return NULL; Nit: can you add an empty line after the VM_BUG_ON_PGFLAGS assert to keep the function readable? Maybe also add a comment on the assert, as it isn't totally obvious. Otherwise looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index dbf9572dabe9..844e95cacea8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -22,18 +22,19 @@ #include "../internal.h" /* - * Structure allocated for each page when block size < PAGE_SIZE to track + * Structure allocated for each page when block size < page size to track * sub-page uptodate status and I/O completions. */ struct iomap_page { atomic_t read_count; atomic_t write_count; spinlock_t uptodate_lock; - DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); + unsigned long uptodate[]; }; static inline struct iomap_page *to_iomap_page(struct page *page) { + VM_BUG_ON_PGFLAGS(PageTail(page), page); if (page_has_private(page)) return (struct iomap_page *)page_private(page); return NULL; @@ -45,11 +46,13 @@ static struct iomap_page * iomap_page_create(struct inode *inode, struct page *page) { struct iomap_page *iop = to_iomap_page(page); + unsigned int nr_blocks = i_blocks_per_page(inode, page); - if (iop || i_blocks_per_page(inode, page) <= 1) + if (iop || nr_blocks <= 1) return iop; - iop = kzalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); + iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), + GFP_NOFS | __GFP_NOFAIL); spin_lock_init(&iop->uptodate_lock); attach_page_private(page, iop); return iop; @@ -59,11 +62,14 @@ static void iomap_page_release(struct page *page) { struct iomap_page *iop = detach_page_private(page); + unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page); if (!iop) return; WARN_ON_ONCE(atomic_read(&iop->read_count)); WARN_ON_ONCE(atomic_read(&iop->write_count)); + WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) != + PageUptodate(page)); kfree(iop); }
Size the uptodate array dynamically to support larger pages in the page cache. With a 64kB page, we're only saving 8 bytes per page today, but with a 2MB maximum page size, we'd have to allocate more than 4kB per page. Add a few debugging assertions. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- fs/iomap/buffered-io.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)