Message ID | 20240212213922.783301-15-surenb@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Memory allocation profiling | expand |
On 2/12/24 22:39, Suren Baghdasaryan wrote: > Introduce helper functions to easily instrument page allocators by > storing a pointer to the allocation tag associated with the code that > allocated the page in a page_ext field. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > Co-developed-by: Kent Overstreet <kent.overstreet@linux.dev> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > + > +#ifdef CONFIG_MEM_ALLOC_PROFILING > + > +#include <linux/page_ext.h> > + > +extern struct page_ext_operations page_alloc_tagging_ops; > +extern struct page_ext *page_ext_get(struct page *page); > +extern void page_ext_put(struct page_ext *page_ext); > + > +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) > +{ > + return (void *)page_ext + page_alloc_tagging_ops.offset; > +} > + > +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) > +{ > + return (void *)ref - page_alloc_tagging_ops.offset; > +} > + > +static inline union codetag_ref *get_page_tag_ref(struct page *page) > +{ > + if (page && mem_alloc_profiling_enabled()) { > + struct page_ext *page_ext = page_ext_get(page); > + > + if (page_ext) > + return codetag_ref_from_page_ext(page_ext); I think when structured like this, you're not getting the full benefits of static keys, and the compiler probably can't improve that on its own. - page is tested before the static branch is evaluated - when disabled, the result is NULL, and that's again tested in the callers > + } > + return NULL; > +} > + > +static inline void put_page_tag_ref(union codetag_ref *ref) > +{ > + page_ext_put(page_ext_from_codetag_ref(ref)); > +} > + > +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, > + unsigned int order) > +{ > + union codetag_ref *ref = get_page_tag_ref(page); So the more optimal way would be to test mem_alloc_profiling_enabled() here as the very first thing before trying to get the ref. > + if (ref) { > + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE << order); > + put_page_tag_ref(ref); > + } > +} > + > +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) > +{ > + union codetag_ref *ref = get_page_tag_ref(page); And same here. > + if (ref) { > + alloc_tag_sub(ref, PAGE_SIZE << order); > + put_page_tag_ref(ref); > + } > +} > + > +#else /* CONFIG_MEM_ALLOC_PROFILING */ > + > +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, > + unsigned int order) {} > +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) {} > + > +#endif /* CONFIG_MEM_ALLOC_PROFILING */ > + > +#endif /* _LINUX_PGALLOC_TAG_H */ > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index 78d258ca508f..7bbdb0ddb011 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -978,6 +978,7 @@ config MEM_ALLOC_PROFILING > depends on PROC_FS > depends on !DEBUG_FORCE_WEAK_PER_CPU > select CODE_TAGGING > + select PAGE_EXTENSION > help > Track allocation source code and record total allocation size > initiated at that code location. The mechanism can be used to track > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > index 4fc031f9cefd..2d5226d9262d 100644 > --- a/lib/alloc_tag.c > +++ b/lib/alloc_tag.c > @@ -3,6 +3,7 @@ > #include <linux/fs.h> > #include <linux/gfp.h> > #include <linux/module.h> > +#include <linux/page_ext.h> > #include <linux/proc_fs.h> > #include <linux/seq_buf.h> > #include <linux/seq_file.h> > @@ -124,6 +125,22 @@ static bool alloc_tag_module_unload(struct codetag_type *cttype, > return module_unused; > } > > +static __init bool need_page_alloc_tagging(void) > +{ > + return true; So this means the page_ext memory overead is paid unconditionally once MEM_ALLOC_PROFILING is compile time enabled, even if never enabled during runtime? That makes it rather costly to be suitable for generic distro kernels where the code could be compile time enabled, and runtime enabling suggested in a debugging/support scenario. It's what we do with page_owner, debug_pagealloc, slub_debug etc. Ideally we'd have some vmalloc based page_ext flavor for later-than-boot runtime enablement, as we now have for stackdepot. But that could be explored later. For now it would be sufficient to add an early_param boot parameter to control the enablement including page_ext, like page_owner and other features do. > +} > + > +static __init void init_page_alloc_tagging(void) > +{ > +} > + > +struct page_ext_operations page_alloc_tagging_ops = { > + .size = sizeof(union codetag_ref), > + .need = need_page_alloc_tagging, > + .init = init_page_alloc_tagging, > +}; > +EXPORT_SYMBOL(page_alloc_tagging_ops);
On Fri, Feb 16, 2024 at 1:45 AM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 2/12/24 22:39, Suren Baghdasaryan wrote: > > Introduce helper functions to easily instrument page allocators by > > storing a pointer to the allocation tag associated with the code that > > allocated the page in a page_ext field. > > > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > > Co-developed-by: Kent Overstreet <kent.overstreet@linux.dev> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > + > > +#ifdef CONFIG_MEM_ALLOC_PROFILING > > + > > +#include <linux/page_ext.h> > > + > > +extern struct page_ext_operations page_alloc_tagging_ops; > > +extern struct page_ext *page_ext_get(struct page *page); > > +extern void page_ext_put(struct page_ext *page_ext); > > + > > +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) > > +{ > > + return (void *)page_ext + page_alloc_tagging_ops.offset; > > +} > > + > > +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) > > +{ > > + return (void *)ref - page_alloc_tagging_ops.offset; > > +} > > + > > +static inline union codetag_ref *get_page_tag_ref(struct page *page) > > +{ > > + if (page && mem_alloc_profiling_enabled()) { > > + struct page_ext *page_ext = page_ext_get(page); > > + > > + if (page_ext) > > + return codetag_ref_from_page_ext(page_ext); > > I think when structured like this, you're not getting the full benefits of > static keys, and the compiler probably can't improve that on its own. > > - page is tested before the static branch is evaluated > - when disabled, the result is NULL, and that's again tested in the callers Yes, that sounds right. I'll move the static branch check earlier like you suggested. Thanks! > > > + } > > + return NULL; > > +} > > + > > +static inline void put_page_tag_ref(union codetag_ref *ref) > > +{ > > + page_ext_put(page_ext_from_codetag_ref(ref)); > > +} > > + > > +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, > > + unsigned int order) > > +{ > > + union codetag_ref *ref = get_page_tag_ref(page); > > So the more optimal way would be to test mem_alloc_profiling_enabled() here > as the very first thing before trying to get the ref. > > > + if (ref) { > > + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE << order); > > + put_page_tag_ref(ref); > > + } > > +} > > + > > +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) > > +{ > > + union codetag_ref *ref = get_page_tag_ref(page); > > And same here. > > > + if (ref) { > > + alloc_tag_sub(ref, PAGE_SIZE << order); > > + put_page_tag_ref(ref); > > + } > > +} > > + > > +#else /* CONFIG_MEM_ALLOC_PROFILING */ > > + > > +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, > > + unsigned int order) {} > > +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) {} > > + > > +#endif /* CONFIG_MEM_ALLOC_PROFILING */ > > + > > +#endif /* _LINUX_PGALLOC_TAG_H */ > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > > index 78d258ca508f..7bbdb0ddb011 100644 > > --- a/lib/Kconfig.debug > > +++ b/lib/Kconfig.debug > > @@ -978,6 +978,7 @@ config MEM_ALLOC_PROFILING > > depends on PROC_FS > > depends on !DEBUG_FORCE_WEAK_PER_CPU > > select CODE_TAGGING > > + select PAGE_EXTENSION > > help > > Track allocation source code and record total allocation size > > initiated at that code location. The mechanism can be used to track > > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > > index 4fc031f9cefd..2d5226d9262d 100644 > > --- a/lib/alloc_tag.c > > +++ b/lib/alloc_tag.c > > @@ -3,6 +3,7 @@ > > #include <linux/fs.h> > > #include <linux/gfp.h> > > #include <linux/module.h> > > +#include <linux/page_ext.h> > > #include <linux/proc_fs.h> > > #include <linux/seq_buf.h> > > #include <linux/seq_file.h> > > @@ -124,6 +125,22 @@ static bool alloc_tag_module_unload(struct codetag_type *cttype, > > return module_unused; > > } > > > > +static __init bool need_page_alloc_tagging(void) > > +{ > > + return true; > > So this means the page_ext memory overead is paid unconditionally once > MEM_ALLOC_PROFILING is compile time enabled, even if never enabled during > runtime? That makes it rather costly to be suitable for generic distro > kernels where the code could be compile time enabled, and runtime enabling > suggested in a debugging/support scenario. It's what we do with page_owner, > debug_pagealloc, slub_debug etc. > > Ideally we'd have some vmalloc based page_ext flavor for later-than-boot > runtime enablement, as we now have for stackdepot. But that could be > explored later. For now it would be sufficient to add an early_param boot > parameter to control the enablement including page_ext, like page_owner and > other features do. Sounds reasonable. In v1 of this patchset we used early boot parameter but after LSF/MM discussion that was changed to runtime controls. Sounds like we would need both here. Should be easy to add. Allocating/reclaiming dynamically the space for page_ext, slab_ext, etc is not trivial and if done would be done separately. I looked into it before and listed the encountered issues in the cover letter of v2 [1], see "things we could not address" section. [1] https://lore.kernel.org/all/20231024134637.3120277-1-surenb@google.com/ > > > +} > > + > > +static __init void init_page_alloc_tagging(void) > > +{ > > +} > > + > > +struct page_ext_operations page_alloc_tagging_ops = { > > + .size = sizeof(union codetag_ref), > > + .need = need_page_alloc_tagging, > > + .init = init_page_alloc_tagging, > > +}; > > +EXPORT_SYMBOL(page_alloc_tagging_ops); > > > -- > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com. >
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index be98564191e6..07e0656898f9 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -4,7 +4,6 @@ #include <linux/types.h> #include <linux/stacktrace.h> -#include <linux/stackdepot.h> struct pglist_data; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h new file mode 100644 index 000000000000..a060c26eb449 --- /dev/null +++ b/include/linux/pgalloc_tag.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * page allocation tagging + */ +#ifndef _LINUX_PGALLOC_TAG_H +#define _LINUX_PGALLOC_TAG_H + +#include <linux/alloc_tag.h> + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +#include <linux/page_ext.h> + +extern struct page_ext_operations page_alloc_tagging_ops; +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); + +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +{ + return (void *)page_ext + page_alloc_tagging_ops.offset; +} + +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) +{ + return (void *)ref - page_alloc_tagging_ops.offset; +} + +static inline union codetag_ref *get_page_tag_ref(struct page *page) +{ + if (page && mem_alloc_profiling_enabled()) { + struct page_ext *page_ext = page_ext_get(page); + + if (page_ext) + return codetag_ref_from_page_ext(page_ext); + } + return NULL; +} + +static inline void put_page_tag_ref(union codetag_ref *ref) +{ + page_ext_put(page_ext_from_codetag_ref(ref)); +} + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int order) +{ + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE << order); + put_page_tag_ref(ref); + } +} + +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) +{ + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_sub(ref, PAGE_SIZE << order); + put_page_tag_ref(ref); + } +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int order) {} +static inline void pgalloc_tag_sub(struct page *page, unsigned int order) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#endif /* _LINUX_PGALLOC_TAG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 78d258ca508f..7bbdb0ddb011 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -978,6 +978,7 @@ config MEM_ALLOC_PROFILING depends on PROC_FS depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING + select PAGE_EXTENSION help Track allocation source code and record total allocation size initiated at that code location. The mechanism can be used to track diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 4fc031f9cefd..2d5226d9262d 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -3,6 +3,7 @@ #include <linux/fs.h> #include <linux/gfp.h> #include <linux/module.h> +#include <linux/page_ext.h> #include <linux/proc_fs.h> #include <linux/seq_buf.h> #include <linux/seq_file.h> @@ -124,6 +125,22 @@ static bool alloc_tag_module_unload(struct codetag_type *cttype, return module_unused; } +static __init bool need_page_alloc_tagging(void) +{ + return true; +} + +static __init void init_page_alloc_tagging(void) +{ +} + +struct page_ext_operations page_alloc_tagging_ops = { + .size = sizeof(union codetag_ref), + .need = need_page_alloc_tagging, + .init = init_page_alloc_tagging, +}; +EXPORT_SYMBOL(page_alloc_tagging_ops); + static struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", diff --git a/mm/mm_init.c b/mm/mm_init.c index 2c19f5515e36..e9ea2919d02d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -24,6 +24,7 @@ #include <linux/page_ext.h> #include <linux/pti.h> #include <linux/pgtable.h> +#include <linux/stackdepot.h> #include <linux/swap.h> #include <linux/cma.h> #include <linux/crash_dump.h> diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 150d4f23b010..edb79a55a252 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -53,6 +53,7 @@ #include <linux/khugepaged.h> #include <linux/delayacct.h> #include <linux/cacheinfo.h> +#include <linux/pgalloc_tag.h> #include <asm/div64.h> #include "internal.h" #include "shuffle.h" @@ -1100,6 +1101,7 @@ static __always_inline bool free_pages_prepare(struct page *page, /* Do not let hwpoison pages hit pcplists/buddy */ reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, order); return false; } @@ -1139,6 +1141,7 @@ static __always_inline bool free_pages_prepare(struct page *page, page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -1532,6 +1535,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_owner(page, order, gfp_flags); page_table_check_alloc(page, order); + pgalloc_tag_add(page, current, order); } static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, diff --git a/mm/page_ext.c b/mm/page_ext.c index 4548fcc66d74..3c58fe8a24df 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -10,6 +10,7 @@ #include <linux/page_idle.h> #include <linux/page_table_check.h> #include <linux/rcupdate.h> +#include <linux/pgalloc_tag.h> /* * struct page extension @@ -82,6 +83,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = { #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + &page_alloc_tagging_ops, +#endif #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif