Message ID | 20220104001046.12263-24-vbabka@suse.cz (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Separate struct slab from struct page | expand |
On Tue, Jan 04, 2022 at 01:10:37AM +0100, Vlastimil Babka wrote: > page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages > so convert all the related infrastructure to struct slab. Also use > struct folio instead of struct page when resolving object pointers. > > This is not just mechanistic changing of types and names. Now in > mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret > the folio as a real slab instead of a large kmalloc, instead of relying > on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). > Similarly in memcg_slab_free_hook() where we can encounter > kmalloc_large() pages (here the folio slab flag check is implied by > virt_to_slab()). As a result, page_objcgs_check() can be dropped instead > of converted. > > To avoid include cycles, move the inline definition of slab_objcgs() > from memcontrol.h to mm/slab.h. > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > Cc: Johannes Weiner <hannes@cmpxchg.org> > Cc: Michal Hocko <mhocko@kernel.org> > Cc: Vladimir Davydov <vdavydov.dev@gmail.com> > Cc: <cgroups@vger.kernel.org> > --- > include/linux/memcontrol.h | 48 ------------------------ > mm/memcontrol.c | 47 ++++++++++++----------- > mm/slab.h | 76 ++++++++++++++++++++++++++------------ > 3 files changed, 79 insertions(+), 92 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 0c5c403f4be6..e34112f6a369 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) > return folio->memcg_data & MEMCG_DATA_KMEM; > } > > -/* > - * page_objcgs - get the object cgroups vector associated with a page > - * @page: a pointer to the page struct > - * > - * Returns a pointer to the object cgroups vector associated with the page, > - * or NULL. This function assumes that the page is known to have an > - * associated object cgroups vector. It's not safe to call this function > - * against pages, which might have an associated memory cgroup: e.g. > - * kernel stack pages. > - */ > -static inline struct obj_cgroup **page_objcgs(struct page *page) > -{ > - unsigned long memcg_data = READ_ONCE(page->memcg_data); > - > - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); > - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); > - > - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); > -} > - > -/* > - * page_objcgs_check - get the object cgroups vector associated with a page > - * @page: a pointer to the page struct > - * > - * Returns a pointer to the object cgroups vector associated with the page, > - * or NULL. This function is safe to use if the page can be directly associated > - * with a memory cgroup. > - */ > -static inline struct obj_cgroup **page_objcgs_check(struct page *page) > -{ > - unsigned long memcg_data = READ_ONCE(page->memcg_data); > - > - if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) > - return NULL; > - > - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); > - > - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); > -} > > #else > static inline bool folio_memcg_kmem(struct folio *folio) > @@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) > return false; > } > > -static inline struct obj_cgroup **page_objcgs(struct page *page) > -{ > - return NULL; > -} > - > -static inline struct obj_cgroup **page_objcgs_check(struct page *page) > -{ > - return NULL; > -} > #endif > > static inline bool PageMemcgKmem(struct page *page) > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index f7b789e692a0..f4fdd5675991 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, > rcu_read_unlock(); > } > > -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, > - gfp_t gfp, bool new_page) > +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, > + gfp_t gfp, bool new_slab) > { > - unsigned int objects = objs_per_slab(s, page_slab(page)); > + unsigned int objects = objs_per_slab(s, slab); > unsigned long memcg_data; > void *vec; > > gfp &= ~OBJCGS_CLEAR_MASK; > vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, > - page_to_nid(page)); > + slab_nid(slab)); > if (!vec) > return -ENOMEM; > > memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS; > - if (new_page) { > + if (new_slab) { > /* > - * If the slab page is brand new and nobody can yet access > - * it's memcg_data, no synchronization is required and > - * memcg_data can be simply assigned. > + * If the slab is brand new and nobody can yet access its > + * memcg_data, no synchronization is required and memcg_data can > + * be simply assigned. > */ > - page->memcg_data = memcg_data; > - } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) { > + slab->memcg_data = memcg_data; > + } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) { > /* > - * If the slab page is already in use, somebody can allocate > - * and assign obj_cgroups in parallel. In this case the existing > + * If the slab is already in use, somebody can allocate and > + * assign obj_cgroups in parallel. In this case the existing > * objcg vector should be reused. > */ > kfree(vec); > @@ -2865,26 +2865,31 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, > */ > struct mem_cgroup *mem_cgroup_from_obj(void *p) > { > - struct page *page; > + struct folio *folio; > > if (mem_cgroup_disabled()) > return NULL; > > - page = virt_to_head_page(p); > + folio = virt_to_folio(p); > > /* > * Slab objects are accounted individually, not per-page. > * Memcg membership data for each individual object is saved in > * the page->obj_cgroups. ^^^^^^^^^^^^^^^^^ slab->memcg_data > */ > - if (page_objcgs_check(page)) { > - struct obj_cgroup *objcg; > + if (folio_test_slab(folio)) { > + struct obj_cgroup **objcgs; > + struct slab *slab; > unsigned int off; > > - off = obj_to_index(page->slab_cache, page_slab(page), p); > - objcg = page_objcgs(page)[off]; > - if (objcg) > - return obj_cgroup_memcg(objcg); > + slab = folio_slab(folio); > + objcgs = slab_objcgs(slab); > + if (!objcgs) > + return NULL; > + > + off = obj_to_index(slab->slab_cache, slab, p); > + if (objcgs[off]) > + return obj_cgroup_memcg(objcgs[off]); > > return NULL; > } There is a comment below, which needs some changes: /* * page_memcg_check() is used here, because page_has_obj_cgroups() * check above could fail because the object cgroups vector wasn't set * at that moment, but it can be set concurrently. * page_memcg_check(page) will guarantee that a proper memory * cgroup pointer or NULL will be returned. */ In reality the folio's slab flag can be cleared before releasing the objcgs \ vector. It seems that there is no such possibility at setting the flag, it's always set before allocating and assigning the objcg vector. > @@ -2896,7 +2901,7 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) > * page_memcg_check(page) will guarantee that a proper memory > * cgroup pointer or NULL will be returned. > */ > - return page_memcg_check(page); > + return page_memcg_check(folio_page(folio, 0)); > } > > __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) > diff --git a/mm/slab.h b/mm/slab.h > index bca9181e96d7..36e0022d8267 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -412,15 +412,36 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla > } > > #ifdef CONFIG_MEMCG_KMEM > -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, > - gfp_t gfp, bool new_page); > +/* > + * slab_objcgs - get the object cgroups vector associated with a slab > + * @slab: a pointer to the slab struct > + * > + * Returns a pointer to the object cgroups vector associated with the slab, > + * or NULL. This function assumes that the slab is known to have an > + * associated object cgroups vector. It's not safe to call this function > + * against slabs with underlying pages, which might have an associated memory > + * cgroup: e.g. kernel stack pages. Hm, is it still true? I don't think so. It must be safe to call it for any slab now. The rest looks good to me, please feel free to add Reviewed-by: Roman Gushchin <guro@fb.com> after fixing these comments. Thanks!
On Tue, Jan 04, 2022 at 01:10:37AM +0100, Vlastimil Babka wrote: > page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages > so convert all the related infrastructure to struct slab. Also use > struct folio instead of struct page when resolving object pointers. > > This is not just mechanistic changing of types and names. Now in > mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret > the folio as a real slab instead of a large kmalloc, instead of relying > on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). > Similarly in memcg_slab_free_hook() where we can encounter > kmalloc_large() pages (here the folio slab flag check is implied by > virt_to_slab()). As a result, page_objcgs_check() can be dropped instead > of converted. Btw, it seems that with some minimal changes we can drop the whole thing with the changing of the lower bit and rely on the slab page flag. I'll prepare a patch on top of your series. Thanks!
On 1/5/22 03:41, Roman Gushchin wrote: > On Tue, Jan 04, 2022 at 01:10:37AM +0100, Vlastimil Babka wrote: >> page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages >> so convert all the related infrastructure to struct slab. Also use >> struct folio instead of struct page when resolving object pointers. >> >> This is not just mechanistic changing of types and names. Now in >> mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret >> the folio as a real slab instead of a large kmalloc, instead of relying >> on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). >> Similarly in memcg_slab_free_hook() where we can encounter >> kmalloc_large() pages (here the folio slab flag check is implied by >> virt_to_slab()). As a result, page_objcgs_check() can be dropped instead >> of converted. >> >> To avoid include cycles, move the inline definition of slab_objcgs() >> from memcontrol.h to mm/slab.h. >> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> >> Cc: Johannes Weiner <hannes@cmpxchg.org> >> Cc: Michal Hocko <mhocko@kernel.org> >> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> >> Cc: <cgroups@vger.kernel.org> >> /* >> * Slab objects are accounted individually, not per-page. >> * Memcg membership data for each individual object is saved in >> * the page->obj_cgroups. > ^^^^^^^^^^^^^^^^^ > slab->memcg_data Good catch, fixed. >> */ >> - if (page_objcgs_check(page)) { >> - struct obj_cgroup *objcg; >> + if (folio_test_slab(folio)) { >> + struct obj_cgroup **objcgs; >> + struct slab *slab; >> unsigned int off; >> >> - off = obj_to_index(page->slab_cache, page_slab(page), p); >> - objcg = page_objcgs(page)[off]; >> - if (objcg) >> - return obj_cgroup_memcg(objcg); >> + slab = folio_slab(folio); >> + objcgs = slab_objcgs(slab); >> + if (!objcgs) >> + return NULL; >> + >> + off = obj_to_index(slab->slab_cache, slab, p); >> + if (objcgs[off]) >> + return obj_cgroup_memcg(objcgs[off]); >> >> return NULL; >> } > > There is a comment below, which needs some changes: > /* > * page_memcg_check() is used here, because page_has_obj_cgroups() > * check above could fail because the object cgroups vector wasn't set > * at that moment, but it can be set concurrently. > * page_memcg_check(page) will guarantee that a proper memory > * cgroup pointer or NULL will be returned. > */ > > In reality the folio's slab flag can be cleared before releasing the objcgs \ > vector. It seems that there is no such possibility at setting the flag, > it's always set before allocating and assigning the objcg vector. You're right. I'm changing it to: * page_memcg_check() is used here, because in theory we can encounter * a folio where the slab flag has been cleared already, but * slab->memcg_data has not been freed yet * page_memcg_check(page) will guarantee that a proper memory * cgroup pointer or NULL will be returned. I wrote "in theory" because AFAICS it implies a race as we would have to be freeing a slab and at the same time query an object address. We probably could have used the non-check version, but at this point I don't want to make any functional changes besides these comment fixes. I assume your patch on top would cover it? >> @@ -2896,7 +2901,7 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) >> * page_memcg_check(page) will guarantee that a proper memory >> * cgroup pointer or NULL will be returned. >> */ >> - return page_memcg_check(page); >> + return page_memcg_check(folio_page(folio, 0)); >> } >> >> __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) >> diff --git a/mm/slab.h b/mm/slab.h >> index bca9181e96d7..36e0022d8267 100644 >> --- a/mm/slab.h >> +++ b/mm/slab.h >> @@ -412,15 +412,36 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla >> } >> >> #ifdef CONFIG_MEMCG_KMEM >> -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, >> - gfp_t gfp, bool new_page); >> +/* >> + * slab_objcgs - get the object cgroups vector associated with a slab >> + * @slab: a pointer to the slab struct >> + * >> + * Returns a pointer to the object cgroups vector associated with the slab, >> + * or NULL. This function assumes that the slab is known to have an >> + * associated object cgroups vector. It's not safe to call this function >> + * against slabs with underlying pages, which might have an associated memory >> + * cgroup: e.g. kernel stack pages. > > Hm, is it still true? I don't think so. It must be safe to call it for any > slab now. Right, forgot to update after removing the _check variant. Changing to: * Returns a pointer to the object cgroups vector associated with the slab, * or NULL if no such vector has been associated yet. > The rest looks good to me, please feel free to add > Reviewed-by: Roman Gushchin <guro@fb.com> > after fixing these comments. Thanks! > Thanks!
On Wed, Jan 05, 2022 at 06:08:45PM +0100, Vlastimil Babka wrote: > On 1/5/22 03:41, Roman Gushchin wrote: > > On Tue, Jan 04, 2022 at 01:10:37AM +0100, Vlastimil Babka wrote: > >> page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages > >> so convert all the related infrastructure to struct slab. Also use > >> struct folio instead of struct page when resolving object pointers. > >> > >> This is not just mechanistic changing of types and names. Now in > >> mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret > >> the folio as a real slab instead of a large kmalloc, instead of relying > >> on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). > >> Similarly in memcg_slab_free_hook() where we can encounter > >> kmalloc_large() pages (here the folio slab flag check is implied by > >> virt_to_slab()). As a result, page_objcgs_check() can be dropped instead > >> of converted. > >> > >> To avoid include cycles, move the inline definition of slab_objcgs() > >> from memcontrol.h to mm/slab.h. > >> > >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > >> Cc: Johannes Weiner <hannes@cmpxchg.org> > >> Cc: Michal Hocko <mhocko@kernel.org> > >> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> > >> Cc: <cgroups@vger.kernel.org> > >> /* > >> * Slab objects are accounted individually, not per-page. > >> * Memcg membership data for each individual object is saved in > >> * the page->obj_cgroups. > > ^^^^^^^^^^^^^^^^^ > > slab->memcg_data > > Good catch, fixed. > > >> */ > >> - if (page_objcgs_check(page)) { > >> - struct obj_cgroup *objcg; > >> + if (folio_test_slab(folio)) { > >> + struct obj_cgroup **objcgs; > >> + struct slab *slab; > >> unsigned int off; > >> > >> - off = obj_to_index(page->slab_cache, page_slab(page), p); > >> - objcg = page_objcgs(page)[off]; > >> - if (objcg) > >> - return obj_cgroup_memcg(objcg); > >> + slab = folio_slab(folio); > >> + objcgs = slab_objcgs(slab); > >> + if (!objcgs) > >> + return NULL; > >> + > >> + off = obj_to_index(slab->slab_cache, slab, p); > >> + if (objcgs[off]) > >> + return obj_cgroup_memcg(objcgs[off]); > >> > >> return NULL; > >> } > > > > There is a comment below, which needs some changes: > > /* > > * page_memcg_check() is used here, because page_has_obj_cgroups() > > * check above could fail because the object cgroups vector wasn't set > > * at that moment, but it can be set concurrently. > > * page_memcg_check(page) will guarantee that a proper memory > > * cgroup pointer or NULL will be returned. > > */ > > > > In reality the folio's slab flag can be cleared before releasing the objcgs \ > > vector. It seems that there is no such possibility at setting the flag, > > it's always set before allocating and assigning the objcg vector. > > You're right. I'm changing it to: > > * page_memcg_check() is used here, because in theory we can encounter > * a folio where the slab flag has been cleared already, but > * slab->memcg_data has not been freed yet > * page_memcg_check(page) will guarantee that a proper memory > * cgroup pointer or NULL will be returned. > > I wrote "in theory" because AFAICS it implies a race as we would have to be > freeing a slab and at the same time query an object address. We probably > could have used the non-check version, but at this point I don't want to > make any functional changes besides these comment fixes. Sounds good to me. > > I assume your patch on top would cover it? I tried to master it and remembered why we have this bit in place: there is a /proc/kpagecgroup interface which just scans over pages and reads their memcg data. It has zero control over the lifetime of pages, so it's prone to all kinds of races with setting and clearing the slab flag. So it's probably better to leave the MEMCG_DATA_OBJCGS bit in place. > > >> @@ -2896,7 +2901,7 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) > >> * page_memcg_check(page) will guarantee that a proper memory > >> * cgroup pointer or NULL will be returned. > >> */ > >> - return page_memcg_check(page); > >> + return page_memcg_check(folio_page(folio, 0)); > >> } > >> > >> __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) > >> diff --git a/mm/slab.h b/mm/slab.h > >> index bca9181e96d7..36e0022d8267 100644 > >> --- a/mm/slab.h > >> +++ b/mm/slab.h > >> @@ -412,15 +412,36 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla > >> } > >> > >> #ifdef CONFIG_MEMCG_KMEM > >> -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, > >> - gfp_t gfp, bool new_page); > >> +/* > >> + * slab_objcgs - get the object cgroups vector associated with a slab > >> + * @slab: a pointer to the slab struct > >> + * > >> + * Returns a pointer to the object cgroups vector associated with the slab, > >> + * or NULL. This function assumes that the slab is known to have an > >> + * associated object cgroups vector. It's not safe to call this function > >> + * against slabs with underlying pages, which might have an associated memory > >> + * cgroup: e.g. kernel stack pages. > > > > Hm, is it still true? I don't think so. It must be safe to call it for any > > slab now. > > Right, forgot to update after removing the _check variant. > Changing to: > > * Returns a pointer to the object cgroups vector associated with the slab, > * or NULL if no such vector has been associated yet. Perfect! Thanks!
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c5c403f4be6..e34112f6a369 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) return folio->memcg_data & MEMCG_DATA_KMEM; } -/* - * page_objcgs - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function assumes that the page is known to have an - * associated object cgroups vector. It's not safe to call this function - * against pages, which might have an associated memory cgroup: e.g. - * kernel stack pages. - */ -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} - -/* - * page_objcgs_check - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function is safe to use if the page can be directly associated - * with a memory cgroup. - */ -static inline struct obj_cgroup **page_objcgs_check(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) - return NULL; - - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} #else static inline bool folio_memcg_kmem(struct folio *folio) @@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) return false; } -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - return NULL; -} - -static inline struct obj_cgroup **page_objcgs_check(struct page *page) -{ - return NULL; -} #endif static inline bool PageMemcgKmem(struct page *page) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f7b789e692a0..f4fdd5675991 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, rcu_read_unlock(); } -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, - gfp_t gfp, bool new_page) +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab) { - unsigned int objects = objs_per_slab(s, page_slab(page)); + unsigned int objects = objs_per_slab(s, slab); unsigned long memcg_data; void *vec; gfp &= ~OBJCGS_CLEAR_MASK; vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, - page_to_nid(page)); + slab_nid(slab)); if (!vec) return -ENOMEM; memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS; - if (new_page) { + if (new_slab) { /* - * If the slab page is brand new and nobody can yet access - * it's memcg_data, no synchronization is required and - * memcg_data can be simply assigned. + * If the slab is brand new and nobody can yet access its + * memcg_data, no synchronization is required and memcg_data can + * be simply assigned. */ - page->memcg_data = memcg_data; - } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) { + slab->memcg_data = memcg_data; + } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) { /* - * If the slab page is already in use, somebody can allocate - * and assign obj_cgroups in parallel. In this case the existing + * If the slab is already in use, somebody can allocate and + * assign obj_cgroups in parallel. In this case the existing * objcg vector should be reused. */ kfree(vec); @@ -2865,26 +2865,31 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, */ struct mem_cgroup *mem_cgroup_from_obj(void *p) { - struct page *page; + struct folio *folio; if (mem_cgroup_disabled()) return NULL; - page = virt_to_head_page(p); + folio = virt_to_folio(p); /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in * the page->obj_cgroups. */ - if (page_objcgs_check(page)) { - struct obj_cgroup *objcg; + if (folio_test_slab(folio)) { + struct obj_cgroup **objcgs; + struct slab *slab; unsigned int off; - off = obj_to_index(page->slab_cache, page_slab(page), p); - objcg = page_objcgs(page)[off]; - if (objcg) - return obj_cgroup_memcg(objcg); + slab = folio_slab(folio); + objcgs = slab_objcgs(slab); + if (!objcgs) + return NULL; + + off = obj_to_index(slab->slab_cache, slab, p); + if (objcgs[off]) + return obj_cgroup_memcg(objcgs[off]); return NULL; } @@ -2896,7 +2901,7 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) * page_memcg_check(page) will guarantee that a proper memory * cgroup pointer or NULL will be returned. */ - return page_memcg_check(page); + return page_memcg_check(folio_page(folio, 0)); } __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) diff --git a/mm/slab.h b/mm/slab.h index bca9181e96d7..36e0022d8267 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -412,15 +412,36 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla } #ifdef CONFIG_MEMCG_KMEM -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, - gfp_t gfp, bool new_page); +/* + * slab_objcgs - get the object cgroups vector associated with a slab + * @slab: a pointer to the slab struct + * + * Returns a pointer to the object cgroups vector associated with the slab, + * or NULL. This function assumes that the slab is known to have an + * associated object cgroups vector. It's not safe to call this function + * against slabs with underlying pages, which might have an associated memory + * cgroup: e.g. kernel stack pages. + */ +static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +{ + unsigned long memcg_data = READ_ONCE(slab->memcg_data); + + VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), + slab_page(slab)); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); + + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab); void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr); -static inline void memcg_free_page_obj_cgroups(struct page *page) +static inline void memcg_free_slab_cgroups(struct slab *slab) { - kfree(page_objcgs(page)); - page->memcg_data = 0; + kfree(slab_objcgs(slab)); + slab->memcg_data = 0; } static inline size_t obj_full_size(struct kmem_cache *s) @@ -465,7 +486,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { - struct page *page; + struct slab *slab; unsigned long off; size_t i; @@ -474,19 +495,19 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, for (i = 0; i < size; i++) { if (likely(p[i])) { - page = virt_to_head_page(p[i]); + slab = virt_to_slab(p[i]); - if (!page_objcgs(page) && - memcg_alloc_page_obj_cgroups(page, s, flags, + if (!slab_objcgs(slab) && + memcg_alloc_slab_cgroups(slab, s, flags, false)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; } - off = obj_to_index(s, page_slab(page), p[i]); + off = obj_to_index(s, slab, p[i]); obj_cgroup_get(objcg); - page_objcgs(page)[off] = objcg; - mod_objcg_state(objcg, page_pgdat(page), + slab_objcgs(slab)[off] = objcg; + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s)); } else { obj_cgroup_uncharge(objcg, obj_full_size(s)); @@ -501,7 +522,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, struct kmem_cache *s; struct obj_cgroup **objcgs; struct obj_cgroup *objcg; - struct page *page; + struct slab *slab; unsigned int off; int i; @@ -512,43 +533,52 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, if (unlikely(!p[i])) continue; - page = virt_to_head_page(p[i]); - objcgs = page_objcgs_check(page); + slab = virt_to_slab(p[i]); + /* we could be given a kmalloc_large() object, skip those */ + if (!slab) + continue; + + objcgs = slab_objcgs(slab); if (!objcgs) continue; if (!s_orig) - s = page->slab_cache; + s = slab->slab_cache; else s = s_orig; - off = obj_to_index(s, page_slab(page), p[i]); + off = obj_to_index(s, slab, p[i]); objcg = objcgs[off]; if (!objcg) continue; objcgs[off] = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); - mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), -obj_full_size(s)); obj_cgroup_put(objcg); } } #else /* CONFIG_MEMCG_KMEM */ +static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +{ + return NULL; +} + static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) { return NULL; } -static inline int memcg_alloc_page_obj_cgroups(struct page *page, +static inline int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, gfp_t gfp, - bool new_page) + bool new_slab) { return 0; } -static inline void memcg_free_page_obj_cgroups(struct page *page) +static inline void memcg_free_slab_cgroups(struct slab *slab) { } @@ -587,7 +617,7 @@ static __always_inline void account_slab(struct slab *slab, int order, struct kmem_cache *s, gfp_t gfp) { if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT)) - memcg_alloc_page_obj_cgroups(slab_page(slab), s, gfp, true); + memcg_alloc_slab_cgroups(slab, s, gfp, true); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), PAGE_SIZE << order); @@ -597,7 +627,7 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { if (memcg_kmem_enabled()) - memcg_free_page_obj_cgroups(slab_page(slab)); + memcg_free_slab_cgroups(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order));
page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages so convert all the related infrastructure to struct slab. Also use struct folio instead of struct page when resolving object pointers. This is not just mechanistic changing of types and names. Now in mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret the folio as a real slab instead of a large kmalloc, instead of relying on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). Similarly in memcg_slab_free_hook() where we can encounter kmalloc_large() pages (here the folio slab flag check is implied by virt_to_slab()). As a result, page_objcgs_check() can be dropped instead of converted. To avoid include cycles, move the inline definition of slab_objcgs() from memcontrol.h to mm/slab.h. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: <cgroups@vger.kernel.org> --- include/linux/memcontrol.h | 48 ------------------------ mm/memcontrol.c | 47 ++++++++++++----------- mm/slab.h | 76 ++++++++++++++++++++++++++------------ 3 files changed, 79 insertions(+), 92 deletions(-)