Message ID | 20240325-slab-memcg-v2-1-900a458233a6@suse.cz (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | memcg_kmem hooks refactoring | expand |
On 25/03/2024 08:20, Vlastimil Babka wrote: > The MEMCG_KMEM integration with slab currently relies on two hooks > during allocation. memcg_slab_pre_alloc_hook() determines the objcg and > charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer > to the allocated object(s). > > As Linus pointed out, this is unnecessarily complex. Failing to charge > due to memcg limits should be rare, so we can optimistically allocate > the object(s) and do the charging together with assigning the objcg > pointer in a single post_alloc hook. In the rare case the charging > fails, we can free the object(s) back. > > This simplifies the code (no need to pass around the objcg pointer) and > potentially allows to separate charging from allocation in cases where > it's common that the allocation would be immediately freed, and the > memcg handling overhead could be saved. > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> > Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/slub.c | 180 +++++++++++++++++++++++++++----------------------------------- > 1 file changed, 77 insertions(+), 103 deletions(-) Hi Vlastimil, When running the LTP test "memcg_limit_in_bytes" against next-master (next-20240402) kernel with Arm64 on JUNO, oops is observed in our CI. I can send the full logs if required. It is observed to work fine on softiron-overdrive-3000. A bisect identified 11bb2d9d91627935c63ea3e6a031fd238c846e1 as the first bad commit. Bisected it on the tag "next-20240402" at repo "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". This works fine on Linux version v6.9-rc2 Log from failure against run on JUNO: ------------------------------------ <1>[ 6150.134750] Unable to handle kernel paging request at virtual address ffffffffc2435ec8 <1>[ 6150.143030] Mem abort info: <1>[ 6150.146137] ESR = 0x0000000096000006 <1>[ 6150.150186] EC = 0x25: DABT (current EL), IL = 32 bits <1>[ 6150.155805] SET = 0, FnV = 0 <1>[ 6150.159161] EA = 0, S1PTW = 0 <1>[ 6150.162593] FSC = 0x06: level 2 translation fault <1>[ 6150.167769] Data abort info: <1>[ 6150.170944] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 <1>[ 6150.176729] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 <1>[ 6150.182078] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 <1>[ 6150.187688] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000081dca000 <1>[ 6150.194707] [ffffffffc2435ec8] pgd=0000000000000000, p4d=0000000082c52003, pud=0000000082c53003, pmd=0000000000000000 <0>[ 6150.205688] Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP <4>[ 6150.212245] Modules linked in: overlay binfmt_misc btrfs blake2b_generic libcrc32c xor xor_neon raid6_pq zstd_compress fuse ip_tables x_tables ipv6 crct10dif_ce onboard_usb_dev tda998x cec hdlcd drm_dma_helper drm_kms_helper drm coresight_stm backlight coresight_tpiu coresight_replicator stm_core coresight_tmc coresight_cpu_debug coresight_cti coresight_funnel coresight smsc [last unloaded: binfmt_misc] <4>[ 6150.248579] CPU: 1 PID: 341531 Comm: memcg_process Not tainted 6.9.0-rc2-next-20240402 #1 <4>[ 6150.257056] Hardware name: ARM Juno development board (r0) (DT) <4>[ 6150.258592] thermal thermal_zone0: failed to read out thermal zone (-121) <4>[ 6150.263259] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) <4>[ 6150.263281] pc : memcg_alloc_abort_single+0x4c/0x140 <4>[ 6150.263317] lr : kmem_cache_alloc_noprof+0x200/0x210 <4>[ 6150.263335] sp : ffff800090d7bb10 <4>[ 6150.263345] x29: ffff800090d7bb10 x28: ffff000826cc0e40 x27: ffff000800db2280 <4>[ 6150.263382] x26: 0000ffffa404c000 x25: ffff000800fdf0a8 x24: 00000000000000a8 <4>[ 6150.306574] x23: ffff80008009068c x22: ffff80008029b16c x21: ffff800090d7bb90 <4>[ 6150.314026] x20: ffff000800054400 x19: ffffffffc2435ec0 x18: 0000000000000000 <4>[ 6150.321470] x17: 2020202020203635 x16: 3220202020202030 x15: 0000b5da96570c3c <4>[ 6150.328914] x14: 00000000000001d8 x13: 0000000000000000 x12: 0000000000000000 <4>[ 6150.336358] x11: 0000000000000000 x10: 0000000000000620 x9 : 0000000000000003 <4>[ 6150.343803] x8 : ffff000800db2d80 x7 : 0000000000000003 x6 : ffff00082201f000 <4>[ 6150.351247] x5 : ffffffffffffffff x4 : 0000000000000000 x3 : ffffffffffffffff <4>[ 6150.358690] x2 : ffff8008fd0a9000 x1 : 00000000f0000000 x0 : ffffc1ffc0000000 <4>[ 6150.366135] Call trace: <4>[ 6150.368853] memcg_alloc_abort_single+0x4c/0x140 <4>[ 6150.373766] kmem_cache_alloc_noprof+0x200/0x210 <4>[ 6150.378668] vm_area_alloc+0x2c/0xd4 <4>[ 6150.382531] mmap_region+0x178/0x980 <4>[ 6150.386389] do_mmap+0x3cc/0x528 <4>[ 6150.389895] vm_mmap_pgoff+0xec/0x134 <4>[ 6150.393840] ksys_mmap_pgoff+0x4c/0x204 <4>[ 6150.397955] __arm64_sys_mmap+0x30/0x44 <4>[ 6150.402082] invoke_syscall+0x48/0x114 <4>[ 6150.406119] el0_svc_common.constprop.0+0x40/0xe0 <4>[ 6150.411114] do_el0_svc+0x1c/0x28 <4>[ 6150.414716] el0_svc+0x34/0xdc <4>[ 6150.418061] el0t_64_sync_handler+0xc0/0xc4 <4>[ 6150.422532] el0t_64_sync+0x190/0x194 <0>[ 6150.426483] Code: aa1603fe d50320ff 8b131813 aa1e03f6 (f9400660) Bisect log: ---------- git bisect start # good: [39cd87c4eb2b893354f3b850f916353f2658ae6f] Linux 6.9-rc2 git bisect good 39cd87c4eb2b893354f3b850f916353f2658ae6f # bad: [c0b832517f627ead3388c6f0c74e8ac10ad5774b] Add linux-next specific files for 20240402 git bisect bad c0b832517f627ead3388c6f0c74e8ac10ad5774b # bad: [784b758e641c4b36be7ef8ab585bea834099b030] Merge branch 'for-linux-next' of https://gitlab.freedesktop.org/drm/misc/kernel.git git bisect bad 784b758e641c4b36be7ef8ab585bea834099b030 # bad: [631746aaa0999cbba47b1efc10421d8330a78de5] Merge branch 'xtensa-for-next' of git://github.com/jcmvbkbc/linux-xtensa.git git bisect bad 631746aaa0999cbba47b1efc10421d8330a78de5 # bad: [d4c0a0316990688c0b77de2d3f7dfc91582c46ad] Merge branch 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm git bisect bad d4c0a0316990688c0b77de2d3f7dfc91582c46ad # bad: [ef4e56ae052ae57550fd24cdac78c99a36c8a20b] mm: take placement mappings gap into account git bisect bad ef4e56ae052ae57550fd24cdac78c99a36c8a20b # good: [ac3c1a2ea65b2cbefdc1f7fe3085d633ebb174c8] mm-page_isolation-prepare-for-hygienic-freelists-fix git bisect good ac3c1a2ea65b2cbefdc1f7fe3085d633ebb174c8 # bad: [f11bb2d9d91627935c63ea3e6a031fd238c846e1] mm, slab: move memcg charging to post-alloc hook git bisect bad f11bb2d9d91627935c63ea3e6a031fd238c846e1 # good: [f307051520f6860a1f21cad32b4109b201196ae9] x86: remove unneeded memblock_find_dma_reserve() git bisect good f307051520f6860a1f21cad32b4109b201196ae9 # good: [dbde2cb09dc4eaf92c80d43c9326d7dca43575f4] mm-move-follow_phys-to-arch-x86-mm-pat-memtypec-fix-2 git bisect good dbde2cb09dc4eaf92c80d43c9326d7dca43575f4 # good: [d8f80fe57b2992199744e9b2616f1a2702317c4b] mm: make folio_test_idle and folio_test_young take a const argument git bisect good d8f80fe57b2992199744e9b2616f1a2702317c4b # good: [1165b638f42a982be42792ded4f8c6f94b13f0fe] mm-convert-arch_clear_hugepage_flags-to-take-a-folio-fix git bisect good 1165b638f42a982be42792ded4f8c6f94b13f0fe # good: [f9bc35de30a88a146989601b1b2268946739f0e0] remove references to page->flags in documentation git bisect good f9bc35de30a88a146989601b1b2268946739f0e0 # good: [ea1be2228bb6d6c09b59a1f58b4b7582016825e5] proc: rewrite stable_page_flags() git bisect good ea1be2228bb6d6c09b59a1f58b4b7582016825e5 # first bad commit: [f11bb2d9d91627935c63ea3e6a031fd238c846e1] mm, slab: move memcg charging to post-alloc hook Thanks, Aishwarya
On 4/3/24 1:39 PM, Aishwarya TCV wrote: > > > On 25/03/2024 08:20, Vlastimil Babka wrote: >> The MEMCG_KMEM integration with slab currently relies on two hooks >> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and >> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer >> to the allocated object(s). >> >> As Linus pointed out, this is unnecessarily complex. Failing to charge >> due to memcg limits should be rare, so we can optimistically allocate >> the object(s) and do the charging together with assigning the objcg >> pointer in a single post_alloc hook. In the rare case the charging >> fails, we can free the object(s) back. >> >> This simplifies the code (no need to pass around the objcg pointer) and >> potentially allows to separate charging from allocation in cases where >> it's common that the allocation would be immediately freed, and the >> memcg handling overhead could be saved. >> >> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> >> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ >> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> >> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> >> --- >> mm/slub.c | 180 +++++++++++++++++++++++++++----------------------------------- >> 1 file changed, 77 insertions(+), 103 deletions(-) > > Hi Vlastimil, > > When running the LTP test "memcg_limit_in_bytes" against next-master > (next-20240402) kernel with Arm64 on JUNO, oops is observed in our CI. I > can send the full logs if required. It is observed to work fine on > softiron-overdrive-3000. > > A bisect identified 11bb2d9d91627935c63ea3e6a031fd238c846e1 as the first > bad commit. Bisected it on the tag "next-20240402" at repo > "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". > > This works fine on Linux version v6.9-rc2 Oops, sorry, can you verify that this fixes it? Thanks. ----8<---- From b0597c220624fef4f10e26079a3ff1c86f02a12b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka <vbabka@suse.cz> Date: Wed, 3 Apr 2024 17:45:15 +0200 Subject: [PATCH] fixup! mm, slab: move memcg charging to post-alloc hook The call to memcg_alloc_abort_single() is wrong, it expects a pointer to single object, not an array. Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f5b151a58b7d..b32e79629ae7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2100,7 +2100,7 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, return true; if (likely(size == 1)) { - memcg_alloc_abort_single(s, p); + memcg_alloc_abort_single(s, *p); *p = NULL; } else { kmem_cache_free_bulk(s, size, p);
On Wed, Apr 03, 2024 at 05:48:24PM +0200, Vlastimil Babka wrote: > On 4/3/24 1:39 PM, Aishwarya TCV wrote: > > > > > > On 25/03/2024 08:20, Vlastimil Babka wrote: > >> The MEMCG_KMEM integration with slab currently relies on two hooks > >> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and > >> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer > >> to the allocated object(s). > >> > >> As Linus pointed out, this is unnecessarily complex. Failing to charge > >> due to memcg limits should be rare, so we can optimistically allocate > >> the object(s) and do the charging together with assigning the objcg > >> pointer in a single post_alloc hook. In the rare case the charging > >> fails, we can free the object(s) back. > >> > >> This simplifies the code (no need to pass around the objcg pointer) and > >> potentially allows to separate charging from allocation in cases where > >> it's common that the allocation would be immediately freed, and the > >> memcg handling overhead could be saved. > >> > >> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > >> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ > >> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> > >> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> > >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > >> --- > >> mm/slub.c | 180 +++++++++++++++++++++++++++----------------------------------- > >> 1 file changed, 77 insertions(+), 103 deletions(-) > > > > Hi Vlastimil, > > > > When running the LTP test "memcg_limit_in_bytes" against next-master > > (next-20240402) kernel with Arm64 on JUNO, oops is observed in our CI. I > > can send the full logs if required. It is observed to work fine on > > softiron-overdrive-3000. > > > > A bisect identified 11bb2d9d91627935c63ea3e6a031fd238c846e1 as the first > > bad commit. Bisected it on the tag "next-20240402" at repo > > "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". > > > > This works fine on Linux version v6.9-rc2 > > Oops, sorry, can you verify that this fixes it? > Thanks. > > ----8<---- > From b0597c220624fef4f10e26079a3ff1c86f02a12b Mon Sep 17 00:00:00 2001 > From: Vlastimil Babka <vbabka@suse.cz> > Date: Wed, 3 Apr 2024 17:45:15 +0200 > Subject: [PATCH] fixup! mm, slab: move memcg charging to post-alloc hook > > The call to memcg_alloc_abort_single() is wrong, it expects a pointer to > single object, not an array. > > Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Oh, indeed. Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> Vlastimil, here is another small comments fixup for the same original patch: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0745a28782de..9bd0ffd4c547 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -353,7 +353,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, /* * A lot of the calls to the cache allocation functions are expected to be - * inlined by the compiler. Since the calls to memcg_slab_pre_alloc_hook() are + * inlined by the compiler. Since the calls to memcg_slab_post_alloc_hook() are * conditional to this static branch, we'll have to allow modules that does * kmem_cache_alloc and the such to see this symbol as well */ Thanks!
On 03/04/2024 16:48, Vlastimil Babka wrote: > On 4/3/24 1:39 PM, Aishwarya TCV wrote: >> >> >> On 25/03/2024 08:20, Vlastimil Babka wrote: >>> The MEMCG_KMEM integration with slab currently relies on two hooks >>> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and >>> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer >>> to the allocated object(s). >>> >>> As Linus pointed out, this is unnecessarily complex. Failing to charge >>> due to memcg limits should be rare, so we can optimistically allocate >>> the object(s) and do the charging together with assigning the objcg >>> pointer in a single post_alloc hook. In the rare case the charging >>> fails, we can free the object(s) back. >>> >>> This simplifies the code (no need to pass around the objcg pointer) and >>> potentially allows to separate charging from allocation in cases where >>> it's common that the allocation would be immediately freed, and the >>> memcg handling overhead could be saved. >>> >>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> >>> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ >>> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> >>> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> >>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> >>> --- >>> mm/slub.c | 180 +++++++++++++++++++++++++++----------------------------------- >>> 1 file changed, 77 insertions(+), 103 deletions(-) >> >> Hi Vlastimil, >> >> When running the LTP test "memcg_limit_in_bytes" against next-master >> (next-20240402) kernel with Arm64 on JUNO, oops is observed in our CI. I >> can send the full logs if required. It is observed to work fine on >> softiron-overdrive-3000. >> >> A bisect identified 11bb2d9d91627935c63ea3e6a031fd238c846e1 as the first >> bad commit. Bisected it on the tag "next-20240402" at repo >> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". >> >> This works fine on Linux version v6.9-rc2 > > Oops, sorry, can you verify that this fixes it? > Thanks. > > ----8<---- > From b0597c220624fef4f10e26079a3ff1c86f02a12b Mon Sep 17 00:00:00 2001 > From: Vlastimil Babka <vbabka@suse.cz> > Date: Wed, 3 Apr 2024 17:45:15 +0200 > Subject: [PATCH] fixup! mm, slab: move memcg charging to post-alloc hook > > The call to memcg_alloc_abort_single() is wrong, it expects a pointer to > single object, not an array. > > Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/slub.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/mm/slub.c b/mm/slub.c > index f5b151a58b7d..b32e79629ae7 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2100,7 +2100,7 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, > return true; > > if (likely(size == 1)) { > - memcg_alloc_abort_single(s, p); > + memcg_alloc_abort_single(s, *p); > *p = NULL; > } else { > kmem_cache_free_bulk(s, size, p); Tested the attached patch on next-20240302. Confirming that the test is running fine. Test run log is attached below. Test run log: -------------- memcg_limit_in_bytes 8 TPASS: process 614 is killed memcg_limit_in_bytes 9 TINFO: Test limit_in_bytes will be aligned to PAGESIZE memcg_limit_in_bytes 9 TPASS: echo 4095 > memory.limit_in_bytes passed as expected memcg_limit_in_bytes 9 TPASS: input=4095, limit_in_bytes=0 memcg_limit_in_bytes 10 TPASS: echo 4097 > memory.limit_in_bytes passed as expected memcg_limit_in_bytes 10 TPASS: input=4097, limit_in_bytes=4096 memcg_limit_in_bytes 11 TPASS: echo 1 > memory.limit_in_bytes passed as expected memcg_limit_in_bytes 11 TPASS: input=1, limit_in_bytes=0 memcg_limit_in_bytes 12 TINFO: Test invalid memory.limit_in_bytes memcg_limit_in_bytes 12 TPASS: echo -1 > memory.limit_in_bytes passed as expected memcg_limit_in_bytes 13 TPASS: echo 1.0 > memory.limit_in_bytes failed as expected memcg_limit_in_bytes 14 TPASS: echo 1xx > memory.limit_in_bytes failed as expected memcg_limit_in_bytes 15 TPASS: echo xx > memory.limit_in_bytes failed as expected Summary: passed 18 failed 0 broken 0 skipped 0 warnings 0 Thanks, Aishwarya
On Mon, Mar 25, 2024 at 09:20:32AM +0100, Vlastimil Babka wrote: > The MEMCG_KMEM integration with slab currently relies on two hooks > during allocation. memcg_slab_pre_alloc_hook() determines the objcg and > charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer > to the allocated object(s). > > As Linus pointed out, this is unnecessarily complex. Failing to charge > due to memcg limits should be rare, so we can optimistically allocate > the object(s) and do the charging together with assigning the objcg > pointer in a single post_alloc hook. In the rare case the charging > fails, we can free the object(s) back. > > This simplifies the code (no need to pass around the objcg pointer) and > potentially allows to separate charging from allocation in cases where > it's common that the allocation would be immediately freed, and the > memcg handling overhead could be saved. > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> > Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
diff --git a/mm/slub.c b/mm/slub.c index 1bb2a93cf7b6..2440984503c8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1887,23 +1887,36 @@ static inline size_t obj_full_size(struct kmem_cache *s) return s->size + sizeof(struct obj_cgroup *); } -/* - * Returns false if the allocation should fail. - */ -static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - struct obj_cgroup **objcgp, - size_t objects, gfp_t flags) +static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + gfp_t flags, size_t size, + void **p) { + struct obj_cgroup *objcg; + struct slab *slab; + unsigned long off; + size_t i; + /* * The obtained objcg pointer is safe to use within the current scope, * defined by current task or set_active_memcg() pair. * obj_cgroup_get() is used to get a permanent reference. */ - struct obj_cgroup *objcg = current_obj_cgroup(); + objcg = current_obj_cgroup(); if (!objcg) return true; + /* + * slab_alloc_node() avoids the NULL check, so we might be called with a + * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill + * the whole requested size. + * return success as there's nothing to free back + */ + if (unlikely(*p == NULL)) + return true; + + flags &= gfp_allowed_mask; + if (lru) { int ret; struct mem_cgroup *memcg; @@ -1916,71 +1929,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, return false; } - if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) + if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) return false; - *objcgp = objcg; + for (i = 0; i < size; i++) { + slab = virt_to_slab(p[i]); + + if (!slab_objcgs(slab) && + memcg_alloc_slab_cgroups(slab, s, flags, false)) { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + continue; + } + + off = obj_to_index(s, slab, p[i]); + obj_cgroup_get(objcg); + slab_objcgs(slab)[off] = objcg; + mod_objcg_state(objcg, slab_pgdat(slab), + cache_vmstat_idx(s), obj_full_size(s)); + } + return true; } -/* - * Returns false if the allocation should fail. - */ +static void memcg_alloc_abort_single(struct kmem_cache *s, void *object); + static __fastpath_inline -bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, - struct obj_cgroup **objcgp, size_t objects, - gfp_t flags) +bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + gfp_t flags, size_t size, void **p) { - if (!memcg_kmem_online()) + if (likely(!memcg_kmem_online())) return true; if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))) return true; - return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects, - flags)); -} - -static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, - struct obj_cgroup *objcg, - gfp_t flags, size_t size, - void **p) -{ - struct slab *slab; - unsigned long off; - size_t i; - - flags &= gfp_allowed_mask; - - for (i = 0; i < size; i++) { - if (likely(p[i])) { - slab = virt_to_slab(p[i]); - - if (!slab_objcgs(slab) && - memcg_alloc_slab_cgroups(slab, s, flags, false)) { - obj_cgroup_uncharge(objcg, obj_full_size(s)); - continue; - } + if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p))) + return true; - off = obj_to_index(s, slab, p[i]); - obj_cgroup_get(objcg); - slab_objcgs(slab)[off] = objcg; - mod_objcg_state(objcg, slab_pgdat(slab), - cache_vmstat_idx(s), obj_full_size(s)); - } else { - obj_cgroup_uncharge(objcg, obj_full_size(s)); - } + if (likely(size == 1)) { + memcg_alloc_abort_single(s, p); + *p = NULL; + } else { + kmem_cache_free_bulk(s, size, p); } -} - -static __fastpath_inline -void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, - gfp_t flags, size_t size, void **p) -{ - if (likely(!memcg_kmem_online() || !objcg)) - return; - return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p); + return false; } static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, @@ -2019,44 +2012,23 @@ void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, __memcg_slab_free_hook(s, slab, p, objects, objcgs); } - -static inline -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, - struct obj_cgroup *objcg) -{ - if (objcg) - obj_cgroup_uncharge(objcg, objects * obj_full_size(s)); -} #else /* CONFIG_MEMCG_KMEM */ static inline void memcg_free_slab_cgroups(struct slab *slab) { } -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - struct obj_cgroup **objcgp, - size_t objects, gfp_t flags) -{ - return true; -} - -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, - struct obj_cgroup *objcg, +static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, gfp_t flags, size_t size, void **p) { + return true; } static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects) { } - -static inline -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, - struct obj_cgroup *objcg) -{ -} #endif /* CONFIG_MEMCG_KMEM */ /* @@ -3736,10 +3708,7 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) ALLOW_ERROR_INJECTION(should_failslab, ERRNO); static __fastpath_inline -struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - struct obj_cgroup **objcgp, - size_t size, gfp_t flags) +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { flags &= gfp_allowed_mask; @@ -3748,14 +3717,11 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, if (unlikely(should_failslab(s, flags))) return NULL; - if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))) - return NULL; - return s; } static __fastpath_inline -void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, +bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p, bool init, unsigned int orig_size) { @@ -3804,7 +3770,7 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, kmsan_slab_alloc(s, p[i], init_flags); } - memcg_slab_post_alloc_hook(s, objcg, flags, size, p); + return memcg_slab_post_alloc_hook(s, lru, flags, size, p); } /* @@ -3821,10 +3787,9 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) { void *object; - struct obj_cgroup *objcg = NULL; bool init = false; - s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); + s = slab_pre_alloc_hook(s, gfpflags); if (unlikely(!s)) return NULL; @@ -3841,8 +3806,10 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list /* * When init equals 'true', like for kzalloc() family, only * @orig_size bytes might be zeroed instead of s->object_size + * In case this fails due to memcg_slab_post_alloc_hook(), + * object is set to NULL */ - slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size); + slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size); return object; } @@ -4281,6 +4248,16 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, do_slab_free(s, slab, object, object, 1, addr); } +#ifdef CONFIG_MEMCG_KMEM +/* Do not inline the rare memcg charging failed path into the allocation path */ +static noinline +void memcg_alloc_abort_single(struct kmem_cache *s, void *object) +{ + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) + do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_); +} +#endif + static __fastpath_inline void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head, void *tail, void **p, int cnt, unsigned long addr) @@ -4616,29 +4593,26 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { int i; - struct obj_cgroup *objcg = NULL; if (!size) return 0; - /* memcg and kmem_cache debug support */ - s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); + s = slab_pre_alloc_hook(s, flags); if (unlikely(!s)) return 0; i = __kmem_cache_alloc_bulk(s, flags, size, p); + if (unlikely(i == 0)) + return 0; /* * memcg and kmem_cache debug support and memory initialization. * Done outside of the IRQ disabled fastpath loop. */ - if (likely(i != 0)) { - slab_post_alloc_hook(s, objcg, flags, size, p, - slab_want_init_on_alloc(flags, s), s->object_size); - } else { - memcg_slab_alloc_error_hook(s, size, objcg); + if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p, + slab_want_init_on_alloc(flags, s), s->object_size))) { + return 0; } - return i; } EXPORT_SYMBOL(kmem_cache_alloc_bulk);