Message ID | 20210709102855.55058-2-yanfei.xu@windriver.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/2] mm/page_alloc: correct return value when failing at preparing | expand |
On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote: > While the nr_populated is non-zero, however the nr_account might be > zero if allocating fails. In this case, not to count event can save > some cycles. > The much more likely path is that nr_account is positive so we avoid a branch in the common case. > And this commit extract the check of "page_array" from a while > statement to avoid unnecessary checks for it. > I'm surprised the compiler does not catch that page_array is invariant for the loop. Did you check if gcc generates different code is page_array is explicitly checked once instead of putting it in the loop?
On 7/9/21 8:26 PM, Mel Gorman wrote: > [Please note: This e-mail is from an EXTERNAL e-mail address] > > On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote: >> While the nr_populated is non-zero, however the nr_account might be >> zero if allocating fails. In this case, not to count event can save >> some cycles. >> > > The much more likely path is that nr_account is positive so we avoid a > branch in the common case. > Got it. >> And this commit extract the check of "page_array" from a while >> statement to avoid unnecessary checks for it. >> > > I'm surprised the compiler does not catch that page_array is invariant > for the loop. Did you check if gcc generates different code is page_array > is explicitly checked once instead of putting it in the loop? > Hm.. In fact, the page_array always doesn't appear in the loop due to the compiler's optimization. And I just confirmed the assembly. not apply my patch: ffffffff81267100 <__alloc_pages_bulk>: ffffffff81267100: e8 0b 73 df ff callq ffffffff8105e410 <__fentry__> ffffffff81267105: 55 push %rbp ffffffff81267106: 48 89 e5 mov %rsp,%rbp ffffffff81267109: 41 57 push %r15 ffffffff8126710b: 41 56 push %r14 ffffffff8126710d: 41 55 push %r13 ffffffff8126710f: 41 54 push %r12 ffffffff81267111: 53 push %rbx ffffffff81267112: 48 83 ec 58 sub $0x58,%rsp ffffffff81267116: 85 c9 test %ecx,%ecx ffffffff81267118: 89 7d c4 mov %edi,-0x3c(%rbp) ffffffff8126711b: 89 75 9c mov %esi,-0x64(%rbp) ffffffff8126711e: 48 89 55 a0 mov %rdx,-0x60(%rbp) ffffffff81267122: 89 4d d4 mov %ecx,-0x2c(%rbp) ffffffff81267125: 4c 89 45 a8 mov %r8,-0x58(%rbp) ffffffff81267129: 4c 89 4d c8 mov %r9,-0x38(%rbp) ffffffff8126712d: 0f 8e 7c 05 00 00 jle ffffffff812676af <__alloc_pages_bulk+0x5af> ffffffff81267133: 4d 85 c9 test %r9,%r9 ffffffff81267136: 0f 84 84 05 00 00 je ffffffff812676c0 <__alloc_pages_bulk+0x5c0> ffffffff8126713c: 49 83 39 00 cmpq $0x0,(%r9) ffffffff81267140: 0f 84 7a 05 00 00 je ffffffff812676c0 <__alloc_pages_bulk+0x5c0> ffffffff81267146: 49 8d 41 08 lea 0x8(%r9),%rax ffffffff8126714a: 89 ca mov %ecx,%edx ffffffff8126714c: 45 31 e4 xor %r12d,%r12d ffffffff8126714f: eb 0b jmp ffffffff8126715c <__alloc_pages_bulk+0x5c> ffffffff81267151: 48 83 c0 08 add $0x8,%rax ffffffff81267155: 48 83 78 f8 00 cmpq $0x0,-0x8(%rax) ffffffff8126715a: 74 1c je ffffffff81267178 <__alloc_pages_bulk+0x78> ffffffff8126715c: 41 83 c4 01 add $0x1,%r12d ffffffff81267160: 44 39 e2 cmp %r12d,%edx ffffffff81267163: 75 ec jne ffffffff81267151 <__alloc_pages_bulk+0x51> ffffffff81267165: 48 63 45 d4 movslq -0x2c(%rbp),%rax ffffffff81267169: 48 83 c4 58 add $0x58,%rsp ffffffff8126716d: 5b pop %rbx ffffffff8126716e: 41 5c pop %r12 ffffffff81267170: 41 5d pop %r13 ffffffff81267172: 41 5e pop %r14 ffffffff81267174: 41 5f pop %r15 ffffffff81267176: 5d pop %rbp ffffffff81267177: c3 retq ffffffff81267178: 8b 45 d4 mov -0x2c(%rbp),%eax ffffffff8126717b: 44 29 e0 sub %r12d,%eax ffffffff8126717e: 83 f8 01 cmp $0x1,%eax ffffffff81267181: 0f 84 47 04 00 00 je ffffffff812675ce <__alloc_pages_bulk+0x4ce> ffffffff81267187: 8b 0d 53 22 a4 01 mov 0x1a42253(%rip),%ecx # ffffffff82ca93e0 <page_group_by_mobility_disabled> ffffffff8126718d: 48 63 45 9c movslq -0x64(%rbp),%rax ffffffff81267191: 44 8b 7d c4 mov -0x3c(%rbp),%r15d ffffffff81267195: 44 23 3d 58 22 a4 01 and 0x1a42258(%rip),%r15d # ffffffff82ca93f4 <gfp_allowed_mask> applied my patch: ffffffff81267100 <__alloc_pages_bulk>: ffffffff81267100: e8 0b 73 df ff callq ffffffff8105e410 <__fentry__> ffffffff81267105: 55 push %rbp ffffffff81267106: 48 89 e5 mov %rsp,%rbp ffffffff81267109: 41 57 push %r15 ffffffff8126710b: 41 56 push %r14 ffffffff8126710d: 41 55 push %r13 ffffffff8126710f: 41 54 push %r12 ffffffff81267111: 53 push %rbx ffffffff81267112: 48 83 ec 60 sub $0x60,%rsp ffffffff81267116: 85 c9 test %ecx,%ecx ffffffff81267118: 89 7d c8 mov %edi,-0x38(%rbp) ffffffff8126711b: 89 75 9c mov %esi,-0x64(%rbp) ffffffff8126711e: 48 89 55 a0 mov %rdx,-0x60(%rbp) ffffffff81267122: 89 4d d0 mov %ecx,-0x30(%rbp) ffffffff81267125: 4c 89 45 a8 mov %r8,-0x58(%rbp) ffffffff81267129: 0f 8e 91 05 00 00 jle ffffffff812676c0 <__alloc_pages_bulk+0x5c0> ffffffff8126712f: 4d 85 c9 test %r9,%r9 ffffffff81267132: 4d 89 cf mov %r9,%r15 ffffffff81267135: 0f 84 4d 05 00 00 je ffffffff81267688 <__alloc_pages_bulk+0x588> ffffffff8126713b: 8d 49 ff lea -0x1(%rcx),%ecx ffffffff8126713e: 31 c0 xor %eax,%eax ffffffff81267140: eb 03 jmp ffffffff81267145 <__alloc_pages_bulk+0x45> ffffffff81267142: 48 89 d0 mov %rdx,%rax ffffffff81267145: 49 83 3c c7 00 cmpq $0x0,(%r15,%rax,8) ffffffff8126714a: 41 89 c4 mov %eax,%r12d ffffffff8126714d: 74 0d je ffffffff8126715c <__alloc_pages_bulk+0x5c> ffffffff8126714f: 44 8d 60 01 lea 0x1(%rax),%r12d ffffffff81267153: 48 39 c1 cmp %rax,%rcx ffffffff81267156: 48 8d 50 01 lea 0x1(%rax),%rdx ffffffff8126715a: 75 e6 jne ffffffff81267142 <__alloc_pages_bulk+0x42> ffffffff8126715c: 8b 45 d0 mov -0x30(%rbp),%eax ffffffff8126715f: 41 39 c4 cmp %eax,%r12d ffffffff81267162: 0f 84 19 04 00 00 je ffffffff81267581 <__alloc_pages_bulk+0x481> ffffffff81267168: 44 29 e0 sub %r12d,%eax ffffffff8126716b: 83 f8 01 cmp $0x1,%eax ffffffff8126716e: 0f 84 66 04 00 00 je ffffffff812675da <__alloc_pages_bulk+0x4da> ffffffff81267174: 48 63 45 9c movslq -0x64(%rbp),%rax ffffffff81267178: 8b 0d 62 22 a4 01 mov 0x1a42262(%rip),%ecx # ffffffff82ca93e0 <page_group_by_mobility_disabled> ffffffff8126717e: 8b 5d c8 mov -0x38(%rbp),%ebx ffffffff81267181: 23 1d 6d 22 a4 01 and 0x1a4226d(%rip),%ebx # ffffffff82ca93f4 <gfp_allowed_mask> Thanks, Yanfei > -- > Mel Gorman > SUSE Labs >
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e9fd57ca4c1c..e25d508b85e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5235,16 +5235,18 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(nr_pages <= 0)) return 0; - /* - * Skip populated array elements to determine if any pages need - * to be allocated before disabling IRQs. - */ - while (page_array && nr_populated < nr_pages && page_array[nr_populated]) - nr_populated++; + if (page_array) { + /* + * Skip populated array elements to determine if any pages need + * to be allocated before disabling IRQs. + */ + while (nr_populated < nr_pages && page_array[nr_populated]) + nr_populated++; - /* Already populated array? */ - if (unlikely(page_array && nr_pages - nr_populated == 0)) - return nr_populated; + /* Already populated array? */ + if (unlikely(nr_pages - nr_populated == 0)) + return nr_populated; + } /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) @@ -5319,9 +5321,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, local_unlock_irqrestore(&pagesets.lock, flags); - __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); - zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); - + if (likely(nr_account)) { + __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); + zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); + } return nr_populated; failed_irq:
While the nr_populated is non-zero, however the nr_account might be zero if allocating fails. In this case, not to count event can save some cycles. And this commit extract the check of "page_array" from a while statement to avoid unnecessary checks for it. Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com> --- mm/page_alloc.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-)