Message ID | 20210224084807.2179942-1-yuzhao@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: test page->flags directly in page_lru() | expand |
On Wed, 24 Feb 2021 01:48:07 -0700 Yu Zhao <yuzhao@google.com> wrote: > Currently page_lru() uses Page{Active,Unevictable} to determine which > lru list a page belongs to. Page{Active,Unevictable} contain > compound_head() and therefore page_lru() essentially tests > PG_{active,unevictable} against compound_head(page)->flags. Once an > lru list is determined, page->lru, rather than > compound_head(page)->lru, will be added to or deleted from it. > > Though not bug, having compound_head() in page_lru() increases the > size of vmlinux by O(KB) because page_lru() is inlined many places. > And removing compound_head() entirely from Page{Active,Unevictable} > may not be the best option (for the moment) either because there > may be other cases that need compound_head(). This patch makes > page_lru() and __clear_page_lru_flags(), which are used immediately > before and after operations on page->lru, test > PG_{active,unevictable} directly against page->flags instead. Oh geeze. > --- a/include/linux/mm_inline.h > +++ b/include/linux/mm_inline.h > @@ -46,14 +46,12 @@ static __always_inline void __clear_page_lru_flags(struct page *page) > { > VM_BUG_ON_PAGE(!PageLRU(page), page); > > - __ClearPageLRU(page); > - > /* this shouldn't happen, so leave the flags to bad_page() */ > - if (PageActive(page) && PageUnevictable(page)) > + if ((page->flags & (BIT(PG_active) | BIT(PG_unevictable))) == > + (BIT(PG_active) | BIT(PG_unevictable))) > return; This isn't very nice. At the very least we should have (documented!) helper functions for this: /* comment goes here */ static inline bool RawPageActive(struct page *page) { ... } However. Here's what the preprocessor produces for an allmodconfig version of PageActive(): static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) __attribute__((__always_inline__)) int PageActive(struct page *page) { return test_bit(PG_active, &({ do { if (__builtin_expect(!!(PagePoisoned(compound_head(page))), 0)) { dump_page(compound_head(page), "VM_BUG_ON_PAGE(" "PagePoisoned(compound_head(page))"")"); do { ({ asm volatile("%c0: nop\n\t" ".pushsection .discard.instr_begin\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (373)); }); do { asm volatile("1:\t" ".byte 0x0f, 0x0b" "\n" ".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" " - 2b" "\t# bug_entry::bug_addr\n" "\t" ".long " "%c0" " - 2b" "\t# bug_entry::file\n" "\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t# bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i" ("./include/linux/page-flags.h"), "i" (338), "i" (0), "i" (sizeof(struct bug_entry))); } while (0); do { ({ asm volatile("%c0:\n\t" ".pushsection .discard.unreachable\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (374)); }); asm volatile(""); __builtin_unreachable(); } while (0); } while (0); } } while (0); compound_head(page); })->flags); } That's all to test a single bit! Four calls to compound_head(). Compiling this: int wibble(struct page *page) { return PageActive(page); } to the below assembly output (allmodconfig build) and it appears that the compiler did not CSE these calls. Perhaps it would be beneficial to give it a bit of help. This is all nuts. How much of this inlining is really justifiable? Do we know we wouldn't get a better kernel if we did mv mm-inline.h mm-not-inline-any-more.c ? Methinks that mm-inline.c needs some serious work... .type wibble, @function wibble: 1: call __fentry__ .section __mcount_loc, "a",@progbits .quad 1b .previous pushq %r12 # pushq %rbp # pushq %rbx # # mm/swap.c:1156: { movq %rdi, %rbx # page, page movq %rbx, %rbp # page, _14 # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); call __sanitizer_cov_trace_pc # movq 8(%rbx), %r12 # page_2(D)->D.14210.D.14188.compound_head, _8 # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) testb $1, %r12b #, _8 je .L2945 #, # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); call __sanitizer_cov_trace_pc # leaq -1(%r12), %rbp #, _14 jmp .L2945 # .L2945: call __sanitizer_cov_trace_pc # # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) cmpq $-1, 0(%rbp) #, MEM[(long unsigned int *)_15] jne .L2946 #, # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); call __sanitizer_cov_trace_pc # movq 8(%rbx), %rbp #, _16 # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) testb $1, %bpl #, _16 je .L2947 #, # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); leaq -1(%rbp), %rbx #, page call __sanitizer_cov_trace_pc # .L2947: # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) call __sanitizer_cov_trace_pc # movq $.LC20, %rsi #, movq %rbx, %rdi # page, call dump_page # #APP # 338 "./include/linux/page-flags.h" 1 373: nop # .pushsection .discard.instr_begin .long 373b - . # .popsection # 0 "" 2 # 338 "./include/linux/page-flags.h" 1 1: .byte 0x0f, 0x0b .pushsection __bug_table,"aw" 2: .long 1b - 2b # bug_entry::bug_addr .long .LC3 - 2b # bug_entry::file # .word 338 # bug_entry::line # .word 0 # bug_entry::flags # .org 2b+12 # .popsection # 0 "" 2 # 338 "./include/linux/page-flags.h" 1 374: # .pushsection .discard.unreachable .long 374b - . # .popsection # 0 "" 2 #NO_APP .L2946: # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); call __sanitizer_cov_trace_pc # movq 8(%rbx), %rbp #, _28 # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) testb $1, %bpl #, _28 je .L2948 #, # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); leaq -1(%rbp), %rbx #, page call __sanitizer_cov_trace_pc # .L2948: # ./arch/x86/include/asm/bitops.h:207: (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; call __sanitizer_cov_trace_pc # movq (%rbx), %rax # MEM[(const long unsigned int *)_35], _24 # mm/swap.c:1158: } popq %rbx # popq %rbp # # ./arch/x86/include/asm/bitops.h:207: (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; shrq $5, %rax #, tmp107 # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) andl $1, %eax #, tmp106 # mm/swap.c:1158: } popq %r12 # ret
On Wed, Feb 24, 2021 at 05:15:58AM -0800, Andrew Morton wrote: > On Wed, 24 Feb 2021 01:48:07 -0700 Yu Zhao <yuzhao@google.com> wrote: > > > Currently page_lru() uses Page{Active,Unevictable} to determine which > > lru list a page belongs to. Page{Active,Unevictable} contain > > compound_head() and therefore page_lru() essentially tests > > PG_{active,unevictable} against compound_head(page)->flags. Once an > > lru list is determined, page->lru, rather than > > compound_head(page)->lru, will be added to or deleted from it. > > > > Though not bug, having compound_head() in page_lru() increases the > > size of vmlinux by O(KB) because page_lru() is inlined many places. > > And removing compound_head() entirely from Page{Active,Unevictable} > > may not be the best option (for the moment) either because there > > may be other cases that need compound_head(). This patch makes > > page_lru() and __clear_page_lru_flags(), which are used immediately > > before and after operations on page->lru, test > > PG_{active,unevictable} directly against page->flags instead. > > Oh geeze. > > > --- a/include/linux/mm_inline.h > > +++ b/include/linux/mm_inline.h > > @@ -46,14 +46,12 @@ static __always_inline void __clear_page_lru_flags(struct page *page) > > { > > VM_BUG_ON_PAGE(!PageLRU(page), page); > > > > - __ClearPageLRU(page); > > - > > /* this shouldn't happen, so leave the flags to bad_page() */ > > - if (PageActive(page) && PageUnevictable(page)) > > + if ((page->flags & (BIT(PG_active) | BIT(PG_unevictable))) == > > + (BIT(PG_active) | BIT(PG_unevictable))) > > return; > > This isn't very nice. At the very least we should have (documented!) > helper functions for this: You are right. Now when I look at this, I s/dislike/hate/ it. > /* comment goes here */ > static inline bool RawPageActive(struct page *page) > { > ... > } > > > > However. > > Here's what the preprocessor produces for an allmodconfig version of > PageActive(): > > static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) __attribute__((__always_inline__)) int PageActive(struct page *page) > { > return test_bit(PG_active, &({ do { if (__builtin_expect(!!(PagePoisoned(compound_head(page))), 0)) { dump_page(compound_head(page), "VM_BUG_ON_PAGE(" "PagePoisoned(compound_head(page))"")"); do { ({ asm volatile("%c0: nop\n\t" ".pushsection .discard.instr_begin\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (373)); }); do { asm volatile("1:\t" ".byte 0x0f, 0x0b" "\n" ".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" " - 2b" "\t# bug_entry::bug_addr\n" "\t" ".long " "%c0" " - 2b" "\t# bug_entry::file\n" "\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t# bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i" ("./include/linux/page-flags.h"), "i" (338), "i" (0), "i" (sizeof(struct bug_entry))); } while (0); do { ({ asm volatile("%c0:\n\t" ".pushsection .discard.unreachable\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (374)); }); asm volatile(""); __builtin_unreachable(); } while (0); } while (0); } } while (0); compound_head(page); })->flags); > > } > > That's all to test a single bit! I hear you. Let me spend a couple of days and focus on PG_{lru,active, unevictable,swapbacked} first. They are mostly used with lru-related operations and therefore can be switched to a compound_head()-free policy easily. My estimate is we could save ~8KB by doing so :) Weaning off compound_head() completely is a larger commitment neither I or Alex are willing to make at the moment -- I did suggest this to him last night when I asked him to help test build with GCC, which is their default compiler (we've switched to Clang). Another good point he has raised is they did see a slowdown on ARM64 after compound_head() was first introduced. My point is there may be measurable performance benefit too if we could get rid of those excessive calls to compound_head(). And I'd be happy to work with somebody if they are interested in doing this. Fair? > Four calls to compound_head(). > > Compiling this: > > int wibble(struct page *page) > { > return PageActive(page); > } > > > to the below assembly output (allmodconfig build) and it appears that > the compiler did not CSE these calls. Perhaps it would be beneficial > to give it a bit of help. Another interesting thing I've noticed is the following change from patch 10 also makes vmlinux a couple of hundreds bytes larger with my GCC 4.9.x. -unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) +static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, + int zone_idx) > This is all nuts. How much of this inlining is really justifiable? Do > we know we wouldn't get a better kernel if we did > > mv mm-inline.h mm-not-inline-any-more.c > > ? > > Methinks that mm-inline.c needs some serious work... Agreed. I'll send another series of patches on top of the lru cleanup series this week. > .type wibble, @function > wibble: > 1: call __fentry__ > .section __mcount_loc, "a",@progbits > .quad 1b > .previous > pushq %r12 # > pushq %rbp # > pushq %rbx # > # mm/swap.c:1156: { > movq %rdi, %rbx # page, page > movq %rbx, %rbp # page, _14 > # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); > call __sanitizer_cov_trace_pc # > movq 8(%rbx), %r12 # page_2(D)->D.14210.D.14188.compound_head, _8 > # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) > testb $1, %r12b #, _8 > je .L2945 #, > # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); > call __sanitizer_cov_trace_pc # > leaq -1(%r12), %rbp #, _14 > jmp .L2945 # > .L2945: > call __sanitizer_cov_trace_pc # > # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) > cmpq $-1, 0(%rbp) #, MEM[(long unsigned int *)_15] > jne .L2946 #, > # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); > call __sanitizer_cov_trace_pc # > movq 8(%rbx), %rbp #, _16 > # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) > testb $1, %bpl #, _16 > je .L2947 #, > # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); > leaq -1(%rbp), %rbx #, page > call __sanitizer_cov_trace_pc # > .L2947: > # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) > call __sanitizer_cov_trace_pc # > movq $.LC20, %rsi #, > movq %rbx, %rdi # page, > call dump_page # > #APP > # 338 "./include/linux/page-flags.h" 1 > 373: nop # > .pushsection .discard.instr_begin > .long 373b - . # > .popsection > > # 0 "" 2 > # 338 "./include/linux/page-flags.h" 1 > 1: .byte 0x0f, 0x0b > .pushsection __bug_table,"aw" > 2: .long 1b - 2b # bug_entry::bug_addr > .long .LC3 - 2b # bug_entry::file # > .word 338 # bug_entry::line # > .word 0 # bug_entry::flags # > .org 2b+12 # > .popsection > # 0 "" 2 > # 338 "./include/linux/page-flags.h" 1 > 374: # > .pushsection .discard.unreachable > .long 374b - . # > .popsection > > # 0 "" 2 > #NO_APP > .L2946: > # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); > call __sanitizer_cov_trace_pc # > movq 8(%rbx), %rbp #, _28 > # ./include/linux/page-flags.h:186: if (unlikely(head & 1)) > testb $1, %bpl #, _28 > je .L2948 #, > # ./include/linux/page-flags.h:187: return (struct page *) (head - 1); > leaq -1(%rbp), %rbx #, page > call __sanitizer_cov_trace_pc # > .L2948: > # ./arch/x86/include/asm/bitops.h:207: (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; > call __sanitizer_cov_trace_pc # > movq (%rbx), %rax # MEM[(const long unsigned int *)_35], _24 > # mm/swap.c:1158: } > popq %rbx # > popq %rbp # > # ./arch/x86/include/asm/bitops.h:207: (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; > shrq $5, %rax #, tmp107 > # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) > andl $1, %eax #, tmp106 > # mm/swap.c:1158: } > popq %r12 # > ret >
On Wed, Feb 24, 2021 at 05:15:58AM -0800, Andrew Morton wrote: > Here's what the preprocessor produces for an allmodconfig version of > PageActive(): > > static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) __attribute__((__always_inline__)) int PageActive(struct page *page) > { > return test_bit(PG_active, &({ do { if (__builtin_expect(!!(PagePoisoned(compound_head(page))), 0)) { dump_page(compound_head(page), "VM_BUG_ON_PAGE(" "PagePoisoned(compound_head(page))"")"); do { ({ asm volatile("%c0: nop\n\t" ".pushsection .discard.instr_begin\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (373)); }); do { asm volatile("1:\t" ".byte 0x0f, 0x0b" "\n" ".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" " - 2b" "\t# bug_entry::bug_addr\n" "\t" ".long " "%c0" " - 2b" "\t# bug_entry::file\n" "\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t# bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i" ("./include/linux/page-flags.h"), "i" (338), "i" (0), "i" (sizeof(struct bug_entry))); } while (0); do { ({ asm volatile("%c0:\n\t" ".pushsection .discard.unreachable\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (374)); }); asm volatile(""); __builtin_unreachable(); } while (0); } while (0); } } while (0); compound_head(page); })->flags); > > } > > That's all to test a single bit! > > Four calls to compound_head(). If only somebody were working on a patch series to get rid of all those calls to compound_head()! Some reviews on https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ would be nice. So, I haven't done page_lru() yet in my folio tree. What I would do is: diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..3895cfe6502b 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -63,22 +63,27 @@ static __always_inline void __clear_page_lru_flags(struct page *page) * Returns the LRU list a page should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list folio_lru(struct folio *folio) { enum lru_list lru; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_PAGE(FolioActive(folio) && FolioUnevictable(folio), folio); - if (PageUnevictable(page)) + if (FolioUnevictable(folio)) return LRU_UNEVICTABLE; - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) + lru = page_is_file_lru(&folio->page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (FolioActive(folio)) lru += LRU_ACTIVE; return lru; } +static __always_inline enum lru_list page_lru(struct page *page) +{ + return folio_lru(page_folio(page)); +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec) { That would cause compound_head() to be called once instead of four times (assuming VM_BUG_ON is enabled). It can be reduced down to zero times when the callers are converted from being page-based to being folio-based. There is a further problem with PageFoo() being a READ_ONCE() of page->flags, so the compiler can't CSE it. I have ideas in that direction too; essentially ... unsigned long page_flags = PageFlags(page); if (PageFlagUnevictable(flags)) ... if (PageFlagsActive(flags)) ... and we can generate the PageFlagsFoo macros with the same machinery in page-flags.h that generates PageFoo and FolioFoo. This strikes me as less critical than the folio work to remove all the unnecessary calls to compound_head(). > movq %rbx, %rbp # page, _14 > # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); > call __sanitizer_cov_trace_pc # It's a bit unfair to complain about code generation with a sanitizer-enabled build ...
On Wed, Feb 24, 2021 at 09:56:39PM +0000, Matthew Wilcox wrote: > On Wed, Feb 24, 2021 at 05:15:58AM -0800, Andrew Morton wrote: > > Here's what the preprocessor produces for an allmodconfig version of > > PageActive(): > > > > static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) __attribute__((__always_inline__)) int PageActive(struct page *page) > > { > > return test_bit(PG_active, &({ do { if (__builtin_expect(!!(PagePoisoned(compound_head(page))), 0)) { dump_page(compound_head(page), "VM_BUG_ON_PAGE(" "PagePoisoned(compound_head(page))"")"); do { ({ asm volatile("%c0: nop\n\t" ".pushsection .discard.instr_begin\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (373)); }); do { asm volatile("1:\t" ".byte 0x0f, 0x0b" "\n" ".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" " - 2b" "\t# bug_entry::bug_addr\n" "\t" ".long " "%c0" " - 2b" "\t# bug_entry::file\n" "\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t# bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i" ("./include/linux/page-flags.h"), "i" (338), "i" (0), "i" (sizeof(struct bug_entry))); } while (0); do { ({ asm volatile("%c0:\n\t" ".pushsection .discard.unreachable\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" (374)); }); asm volatile(""); __builtin_unreachable(); } while (0); } while (0); } } while (0); compound_head(page); })->flags); > > > > } > > > > That's all to test a single bit! > > > > Four calls to compound_head(). > > If only somebody were working on a patch series to get rid of > all those calls to compound_head()! Some reviews on > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > would be nice. I'm on board with the idea and have done some research in this direction. We've found that the ideal *anon* page size for Chrome OS is not 4KB or 2MB, but 32KB. I hope we could leverage the folio to support flexible anon page size to reduce the number of page faults (vs 4KB) or internal fragmentation (vs 2MB). That being said, it seems to me this is a long term plan and right now we need something smaller. So if you don't mind, I'll just go ahead and remove compound_head() from Page{LRU,Active,Unevictable, SwapBacked} first? > So, I haven't done page_lru() yet in my folio tree. What I would do is: > > diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h > index 355ea1ee32bd..3895cfe6502b 100644 > --- a/include/linux/mm_inline.h > +++ b/include/linux/mm_inline.h > @@ -63,22 +63,27 @@ static __always_inline void __clear_page_lru_flags(struct page *page) > * Returns the LRU list a page should be on, as an index > * into the array of LRU lists. > */ > -static __always_inline enum lru_list page_lru(struct page *page) > +static __always_inline enum lru_list folio_lru(struct folio *folio) > { > enum lru_list lru; > > - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); > + VM_BUG_ON_PAGE(FolioActive(folio) && FolioUnevictable(folio), folio); > > - if (PageUnevictable(page)) > + if (FolioUnevictable(folio)) > return LRU_UNEVICTABLE; > > - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > - if (PageActive(page)) > + lru = page_is_file_lru(&folio->page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > + if (FolioActive(folio)) > lru += LRU_ACTIVE; > > return lru; > } > > +static __always_inline enum lru_list page_lru(struct page *page) > +{ > + return folio_lru(page_folio(page)); > +} > + > static __always_inline void add_page_to_lru_list(struct page *page, > struct lruvec *lruvec) > { > > That would cause compound_head() to be called once instead of four times > (assuming VM_BUG_ON is enabled). It can be reduced down to zero times > when the callers are converted from being page-based to being folio-based. > > There is a further problem with PageFoo() being a READ_ONCE() > of page->flags, so the compiler can't CSE it. I have ideas in that > direction too; essentially ... > > unsigned long page_flags = PageFlags(page); > > if (PageFlagUnevictable(flags)) > ... > if (PageFlagsActive(flags)) > ... > > and we can generate the PageFlagsFoo macros with the same machinery in > page-flags.h that generates PageFoo and FolioFoo. This strikes me as > less critical than the folio work to remove all the unnecessary calls > to compound_head(). > > > movq %rbx, %rbp # page, _14 > > # ./include/linux/page-flags.h:184: unsigned long head = READ_ONCE(page->compound_head); > > call __sanitizer_cov_trace_pc # > > It's a bit unfair to complain about code generation with a > sanitizer-enabled build ... >
On Wed, Feb 24, 2021 at 03:34:16PM -0700, Yu Zhao wrote: > > If only somebody were working on a patch series to get rid of > > all those calls to compound_head()! Some reviews on > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > would be nice. > > I'm on board with the idea and have done some research in this > direction. We've found that the ideal *anon* page size for Chrome OS > is not 4KB or 2MB, but 32KB. I hope we could leverage the folio to > support flexible anon page size to reduce the number of page faults > (vs 4KB) or internal fragmentation (vs 2MB). > > That being said, it seems to me this is a long term plan and right > now we need something smaller. So if you don't mind, I'll just go > ahead and remove compound_head() from Page{LRU,Active,Unevictable, > SwapBacked} first? It's really not a big change I'm suggesting here. You need https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ https://lore.kernel.org/linux-mm/20210128070404.1922318-5-willy@infradead.org/ https://lore.kernel.org/linux-mm/20210128070404.1922318-8-willy@infradead.org/ and then the patch I sent above to create folio_lru(). Then any changes you want to make to use folios more broadly will incrementally move us towards your goal of 32kB anon pages.
On Wed, Feb 24, 2021 at 10:48:46PM +0000, Matthew Wilcox wrote: > On Wed, Feb 24, 2021 at 03:34:16PM -0700, Yu Zhao wrote: > > > If only somebody were working on a patch series to get rid of > > > all those calls to compound_head()! Some reviews on > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > > would be nice. > > > > I'm on board with the idea and have done some research in this > > direction. We've found that the ideal *anon* page size for Chrome OS > > is not 4KB or 2MB, but 32KB. I hope we could leverage the folio to > > support flexible anon page size to reduce the number of page faults > > (vs 4KB) or internal fragmentation (vs 2MB). > > > > That being said, it seems to me this is a long term plan and right > > now we need something smaller. So if you don't mind, I'll just go > > ahead and remove compound_head() from Page{LRU,Active,Unevictable, > > SwapBacked} first? > > It's really not a big change I'm suggesting here. You need > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > https://lore.kernel.org/linux-mm/20210128070404.1922318-5-willy@infradead.org/ > https://lore.kernel.org/linux-mm/20210128070404.1922318-8-willy@infradead.org/ > and then the patch I sent above to create folio_lru(). > > Then any changes you want to make to use folios more broadly will > incrementally move us towards your goal of 32kB anon pages. Well, these patches introduce a new concept which I'm on board with. Assume everybody else is too, it still seems to me it's an overkill to employee folio to just get rid of unnecessary compound_head() in page_lru() -- this is not a criticism but a compliment. Let me work out something *conceptually* smaller first, and if you think folio is absolutely more suitable even for this specific issue, I'll go review and test the four patches you listed. Sounds good?
On Wed, Feb 24, 2021 at 04:50:39PM -0700, Yu Zhao wrote: > On Wed, Feb 24, 2021 at 10:48:46PM +0000, Matthew Wilcox wrote: > > On Wed, Feb 24, 2021 at 03:34:16PM -0700, Yu Zhao wrote: > > > > If only somebody were working on a patch series to get rid of > > > > all those calls to compound_head()! Some reviews on > > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > > > would be nice. > > > > > > I'm on board with the idea and have done some research in this > > > direction. We've found that the ideal *anon* page size for Chrome OS > > > is not 4KB or 2MB, but 32KB. I hope we could leverage the folio to > > > support flexible anon page size to reduce the number of page faults > > > (vs 4KB) or internal fragmentation (vs 2MB). > > > > > > That being said, it seems to me this is a long term plan and right > > > now we need something smaller. So if you don't mind, I'll just go > > > ahead and remove compound_head() from Page{LRU,Active,Unevictable, > > > SwapBacked} first? > > > > It's really not a big change I'm suggesting here. You need > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > https://lore.kernel.org/linux-mm/20210128070404.1922318-5-willy@infradead.org/ > > https://lore.kernel.org/linux-mm/20210128070404.1922318-8-willy@infradead.org/ > > and then the patch I sent above to create folio_lru(). > > > > Then any changes you want to make to use folios more broadly will > > incrementally move us towards your goal of 32kB anon pages. > > Well, these patches introduce a new concept which I'm on board with. It's not really a new concept ... it's a new type for an existing concept (a head page). > Assume everybody else is too, it still seems to me it's an overkill > to employee folio to just get rid of unnecessary compound_head() > in page_lru() -- this is not a criticism but a compliment. It's not overkill, that really is the point of a folio! If you think about it, only head pages can be on the LRU list (because the compound_head is in the union with the lru list_head). So it always makes sense to talk about folios on the LRU list. > Let me work out something *conceptually* smaller first, and if you > think folio is absolutely more suitable even for this specific issue, > I'll go review and test the four patches you listed. Sounds good? Umm. It seems to me that no matter what you do, it'll be equivalent to this, only without the type-safety?
On Thu, Feb 25, 2021 at 03:55:53AM +0000, Matthew Wilcox wrote: > On Wed, Feb 24, 2021 at 04:50:39PM -0700, Yu Zhao wrote: > > On Wed, Feb 24, 2021 at 10:48:46PM +0000, Matthew Wilcox wrote: > > > On Wed, Feb 24, 2021 at 03:34:16PM -0700, Yu Zhao wrote: > > > > > If only somebody were working on a patch series to get rid of > > > > > all those calls to compound_head()! Some reviews on > > > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > > > > would be nice. > > > > > > > > I'm on board with the idea and have done some research in this > > > > direction. We've found that the ideal *anon* page size for Chrome OS > > > > is not 4KB or 2MB, but 32KB. I hope we could leverage the folio to > > > > support flexible anon page size to reduce the number of page faults > > > > (vs 4KB) or internal fragmentation (vs 2MB). > > > > > > > > That being said, it seems to me this is a long term plan and right > > > > now we need something smaller. So if you don't mind, I'll just go > > > > ahead and remove compound_head() from Page{LRU,Active,Unevictable, > > > > SwapBacked} first? > > > > > > It's really not a big change I'm suggesting here. You need > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-2-willy@infradead.org/ > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-5-willy@infradead.org/ > > > https://lore.kernel.org/linux-mm/20210128070404.1922318-8-willy@infradead.org/ > > > and then the patch I sent above to create folio_lru(). > > > > > > Then any changes you want to make to use folios more broadly will > > > incrementally move us towards your goal of 32kB anon pages. > > > > Well, these patches introduce a new concept which I'm on board with. > > It's not really a new concept ... it's a new type for an existing concept > (a head page). > > > Assume everybody else is too, it still seems to me it's an overkill > > to employee folio to just get rid of unnecessary compound_head() > > in page_lru() -- this is not a criticism but a compliment. > > It's not overkill, that really is the point of a folio! If you > think about it, only head pages can be on the LRU list (because the > compound_head is in the union with the lru list_head). So it > always makes sense to talk about folios on the LRU list. > > > Let me work out something *conceptually* smaller first, and if you > > think folio is absolutely more suitable even for this specific issue, > > I'll go review and test the four patches you listed. Sounds good? > > Umm. It seems to me that no matter what you do, it'll be equivalent to > this, only without the type-safety? I'm thinking about something trivial but still very effective. So far I've only tested it with PG_{active,unevictable}, and I'm already seeing a 4KB gain less the 2KB loss from page_lru(). I didn't go with this at the beginning because it's also time- consuming. I need to go over every single use of PG_{active,unevictable,swapbacked,lru}. add/remove: 0/0 grow/shrink: 1/37 up/down: 4/-4129 (-4125) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3cec6fbef725..c866c363bb41 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1712,6 +1712,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { int count = page_mapcount(page); + struct page *head = compound_head(page); md->pages += nr_pages; if (pte_dirty || PageDirty(page)) @@ -1720,7 +1721,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, if (PageSwapCache(page)) md->swapcache += nr_pages; - if (PageActive(page) || PageUnevictable(page)) + if (PageActive(head) || PageUnevictable(head)) md->active += nr_pages; if (PageWriteback(page)) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index db914477057b..35b3d272ab4c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -335,8 +335,8 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) -PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) - TESTCLEARFLAG(Active, active, PF_HEAD) +PAGEFLAG(Active, active, PF_ONLY_HEAD) __CLEARPAGEFLAG(Active, active, PF_ONLY_HEAD) + TESTCLEARFLAG(Active, active, PF_ONLY_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) __PAGEFLAG(Slab, slab, PF_NO_TAIL) @@ -407,9 +407,9 @@ CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) PAGEFLAG_FALSE(SwapCache) #endif -PAGEFLAG(Unevictable, unevictable, PF_HEAD) - __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD) - TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD) +PAGEFLAG(Unevictable, unevictable, PF_ONLY_HEAD) + __CLEARPAGEFLAG(Unevictable, unevictable, PF_ONLY_HEAD) + TESTCLEARFLAG(Unevictable, unevictable, PF_ONLY_HEAD) #ifdef CONFIG_MMU PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
On Wed, Feb 24, 2021 at 10:22:38PM -0700, Yu Zhao wrote: > On Thu, Feb 25, 2021 at 03:55:53AM +0000, Matthew Wilcox wrote: > > On Wed, Feb 24, 2021 at 04:50:39PM -0700, Yu Zhao wrote: > > > Let me work out something *conceptually* smaller first, and if you > > > think folio is absolutely more suitable even for this specific issue, > > > I'll go review and test the four patches you listed. Sounds good? > > > > Umm. It seems to me that no matter what you do, it'll be equivalent to > > this, only without the type-safety? > > I'm thinking about something trivial but still very effective. So far > I've only tested it with PG_{active,unevictable}, and I'm already > seeing a 4KB gain less the 2KB loss from page_lru(). > > I didn't go with this at the beginning because it's also time- > consuming. I need to go over every single use of > PG_{active,unevictable,swapbacked,lru}. Well, yes. If you went with the folio, it'd also be typesafe. What you've done here makes it a runtime error, and it's only detected if you enable CONFIG_DEBUG_VM_PGFLAGS, which people don't do, in general. > +++ b/fs/proc/task_mmu.c > @@ -1712,6 +1712,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, > unsigned long nr_pages) > { > int count = page_mapcount(page); > + struct page *head = compound_head(page); > > md->pages += nr_pages; > if (pte_dirty || PageDirty(page)) ... if you went full-on folio in this function, you could also make this FolioDirty, saving another call to compound_head. > @@ -1720,7 +1721,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, > if (PageSwapCache(page)) ... ditto ... > md->swapcache += nr_pages; > > - if (PageActive(page) || PageUnevictable(page)) > + if (PageActive(head) || PageUnevictable(head)) > md->active += nr_pages; > > if (PageWriteback(page)) ... ditto...
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..1b8df9e6f63f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -46,14 +46,12 @@ static __always_inline void __clear_page_lru_flags(struct page *page) { VM_BUG_ON_PAGE(!PageLRU(page), page); - __ClearPageLRU(page); - /* this shouldn't happen, so leave the flags to bad_page() */ - if (PageActive(page) && PageUnevictable(page)) + if ((page->flags & (BIT(PG_active) | BIT(PG_unevictable))) == + (BIT(PG_active) | BIT(PG_unevictable))) return; - __ClearPageActive(page); - __ClearPageUnevictable(page); + page->flags &= ~(BIT(PG_lru) | BIT(PG_active) | BIT(PG_unevictable)); } /** @@ -65,18 +63,13 @@ static __always_inline void __clear_page_lru_flags(struct page *page) */ static __always_inline enum lru_list page_lru(struct page *page) { - enum lru_list lru; + unsigned long flags = READ_ONCE(page->flags); VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); - if (PageUnevictable(page)) - return LRU_UNEVICTABLE; - - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) - lru += LRU_ACTIVE; - - return lru; + /* test page->flags directly to avoid unnecessary compound_head() */ + return (flags & BIT(PG_unevictable)) ? LRU_UNEVICTABLE : + (LRU_FILE * !(flags & BIT(PG_swapbacked)) + !!(flags & BIT(PG_active))); } static __always_inline void add_page_to_lru_list(struct page *page,
Currently page_lru() uses Page{Active,Unevictable} to determine which lru list a page belongs to. Page{Active,Unevictable} contain compound_head() and therefore page_lru() essentially tests PG_{active,unevictable} against compound_head(page)->flags. Once an lru list is determined, page->lru, rather than compound_head(page)->lru, will be added to or deleted from it. Though not bug, having compound_head() in page_lru() increases the size of vmlinux by O(KB) because page_lru() is inlined many places. And removing compound_head() entirely from Page{Active,Unevictable} may not be the best option (for the moment) either because there may be other cases that need compound_head(). This patch makes page_lru() and __clear_page_lru_flags(), which are used immediately before and after operations on page->lru, test PG_{active,unevictable} directly against page->flags instead. scripts/bloat-o-meter results before and after the entire series: Glang: add/remove: 0/1 grow/shrink: 7/10 up/down: 191/-1189 (-998) GCC: add/remove: 0/1 grow/shrink: 9/9 up/down: 1010/-783 (227) Signed-off-by: Yu Zhao <yuzhao@google.com> --- include/linux/mm_inline.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-)