diff mbox series

[V2,1/2] mm/slub: Add panic function when slub leaks

Message ID 20240925132505.21278-2-fangzheng.zhang@unisoc.com (mailing list archive)
State New
Headers show
Series Introduce panic function when slub leaks | expand

Commit Message

Fangzheng Zhang Sept. 25, 2024, 1:25 p.m. UTC
Perform real-time memory usage monitoring on the slub page
allocation paths, ie, kmalloc_large_alloced and alloc_slab_page.
When the usage exceeds the set threshole value, the panic function
will be triggered.

Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com>
---
 mm/Kconfig | 11 ++++++++
 mm/slub.c  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

Comments

Greg KH Sept. 26, 2024, 8:26 a.m. UTC | #1
On Wed, Sep 25, 2024 at 09:25:04PM +0800, Fangzheng Zhang wrote:
> Perform real-time memory usage monitoring on the slub page
> allocation paths, ie, kmalloc_large_alloced and alloc_slab_page.
> When the usage exceeds the set threshole value, the panic function
> will be triggered.
> 
> Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com>
> ---
>  mm/Kconfig | 11 ++++++++
>  mm/slub.c  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 86 insertions(+)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 09aebca1cae3..60cf72d4f0da 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -255,6 +255,17 @@ config SLUB_TINY
>  
>  	   If unsure, say N.
>  
> +config SLUB_LEAK_PANIC
> +	bool "Trigger panic when slub leaks"
> +	default y

Again, only use "default y" if you can not boot a machine without your
option enabled.  That is not the case here so please remove this line.

> +	help
> +	  Detect slub leaks by monitoring its usage in real time on the page
> +	  allocation path of the slub. When the slub occupancy exceeds the
> +	  user-set value, it is considered that the slub is leaking at this
> +	  time, and a panic operation will be triggered immediately. Uers
> +	  can enable and set leak threshold by using the kernel command line
> +	  parameters "slub.leak_panic" and "slub.leak_panic_threshold".
> +
>  config SLAB_MERGE_DEFAULT
>  	bool "Allow slab caches to be merged"
>  	default y
> diff --git a/mm/slub.c b/mm/slub.c
> index 21f71cb6cc06..a0b514626de1 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -42,6 +42,7 @@
>  #include <kunit/test.h>
>  #include <kunit/test-bug.h>
>  #include <linux/sort.h>
> +#include <linux/vmstat.h>
>  
>  #include <linux/debugfs.h>
>  #include <trace/events/kmem.h>
> @@ -218,6 +219,15 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
>  #endif
>  #endif		/* CONFIG_SLUB_DEBUG */
>  
> +/* Internal slub_leak_panic definitions */
> +#ifdef CONFIG_SLUB_LEAK_PANIC
> +#define K(x) ((x) << (PAGE_SHIFT-10))

That is a very bad macro name, can you not use something more
descriptive?

> +static bool __read_mostly slub_leak_panic_enabled;
> +static unsigned int __read_mostly slub_leak_panic_threshold;
> +static long max_slab_count, temp_slab_count;
> +#endif

Also, again, please remove #ifdef from .c files as asked for before.

thanks,

greg k-h
diff mbox series

Patch

diff --git a/mm/Kconfig b/mm/Kconfig
index 09aebca1cae3..60cf72d4f0da 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -255,6 +255,17 @@  config SLUB_TINY
 
 	   If unsure, say N.
 
+config SLUB_LEAK_PANIC
+	bool "Trigger panic when slub leaks"
+	default y
+	help
+	  Detect slub leaks by monitoring its usage in real time on the page
+	  allocation path of the slub. When the slub occupancy exceeds the
+	  user-set value, it is considered that the slub is leaking at this
+	  time, and a panic operation will be triggered immediately. Uers
+	  can enable and set leak threshold by using the kernel command line
+	  parameters "slub.leak_panic" and "slub.leak_panic_threshold".
+
 config SLAB_MERGE_DEFAULT
 	bool "Allow slab caches to be merged"
 	default y
diff --git a/mm/slub.c b/mm/slub.c
index 21f71cb6cc06..a0b514626de1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -42,6 +42,7 @@ 
 #include <kunit/test.h>
 #include <kunit/test-bug.h>
 #include <linux/sort.h>
+#include <linux/vmstat.h>
 
 #include <linux/debugfs.h>
 #include <trace/events/kmem.h>
@@ -218,6 +219,15 @@  DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
 #endif		/* CONFIG_SLUB_DEBUG */
 
+/* Internal slub_leak_panic definitions */
+#ifdef CONFIG_SLUB_LEAK_PANIC
+#define K(x) ((x) << (PAGE_SHIFT-10))
+static bool __read_mostly slub_leak_panic_enabled;
+static unsigned int __read_mostly slub_leak_panic_threshold;
+static long max_slab_count, temp_slab_count;
+#endif
+
+
 /* Structure holding parameters for get_partial() call chain */
 struct partial_context {
 	gfp_t flags;
@@ -2424,6 +2434,21 @@  static inline struct slab *alloc_slab_page(gfp_t flags, int node,
 	if (folio_is_pfmemalloc(folio))
 		slab_set_pfmemalloc(slab);
 
+#ifdef CONFIG_SLUB_LEAK_PANIC
+	if (likely(slub_leak_panic_enabled) && slub_leak_panic_threshold > 0) {
+		max_slab_count = K(totalram_pages()) * slub_leak_panic_threshold / 100;
+		temp_slab_count = K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B))
+				+ K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B))
+				+ K(1 << order);
+		if (temp_slab_count > max_slab_count)
+			panic("SLAB LEAK: %s(temp_count %6luKB > max_count %6luKB):\n"
+				"%s gfp_mask=%#x(%pGg), order=%d kB, oom_score_adj=%d\n",
+				__func__, temp_slab_count, max_slab_count,
+				current->comm, flags, &flags, order,
+				current->signal->oom_score_adj);
+	}
+#endif
+
 	return slab;
 }
 
@@ -4212,6 +4237,19 @@  static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
 		ptr = folio_address(folio);
 		lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
 				      PAGE_SIZE << order);
+#ifdef CONFIG_SLUB_LEAK_PANIC
+		if (likely(slub_leak_panic_enabled) && slub_leak_panic_threshold > 0) {
+			max_slab_count = K(totalram_pages()) * slub_leak_panic_threshold / 100;
+			temp_slab_count = K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B))
+					+ K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B));
+			if (temp_slab_count > max_slab_count)
+				panic("SLAB LEAK: %s(temp_count %6luKB > max_count %6luKB):\n"
+					"%s gfp_mask=%#x(%pGg), order=%d kB, oom_score_adj=%d\n",
+					__func__, temp_slab_count, max_slab_count,
+					current->comm, flags, &flags, order,
+					current->signal->oom_score_adj);
+		}
+#endif
 	}
 
 	ptr = kasan_kmalloc_large(ptr, size, flags);
@@ -7443,3 +7481,40 @@  void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 	sinfo->cache_order = oo_order(s->oo);
 }
 #endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * The /sys/module/slub ABI
+ */
+#ifdef CONFIG_SLUB_LEAK_PANIC
+/*
+ * What:          /sys/module/slub/parameters/leak_panic
+ *                /sys/module/slub/parameters/leak_panic_threshold
+ * Date:          Sep 2024
+ * KernelVersion: v6.6+
+ * Description:   Used for slub memory leak check. When the user
+ *                successfully allocates the slub page, it also performs
+ *                statistics on the total slub usage in the system.
+ *                When the usage exceeds the set value
+ *                (threshold * memtotal / 100), it is considered that
+ *                there is a risk of slub leakage in the system at this time.
+ *                A panic operation will be triggered.
+ * Users:         userspace
+ */
+MODULE_PARM_DESC(leak_panic, "Disable/Enable slub_leak_panic");
+module_param_named(leak_panic, slub_leak_panic_enabled, bool, 0644);
+
+static int slub_leak_panic_threshold_set(const char *val, const struct kernel_param *kp)
+{
+	return param_set_uint_minmax(val, kp, 0, 100);
+}
+
+static const struct kernel_param_ops slub_leak_panic_threshold_ops = {
+	.set = slub_leak_panic_threshold_set,
+	.get = param_get_uint,
+};
+
+MODULE_PARM_DESC(leak_panic_threshold,
+		"Upper limit value of slub, expressed as a percentage of memtotal (0 ~ 100)");
+module_param_cb(leak_panic_threshold,
+		&slub_leak_panic_threshold_ops, &slub_leak_panic_threshold, 0644);
+#endif /* CONFIG_SLUB_LEAK_PANIC */