diff mbox series

[v3,1/2] mm: slub: Print the broken data before restoring slub.

Message ID 20250220033953.1606820-2-hyesoo.yu@samsung.com (mailing list archive)
State New
Headers show
Series mm: slub: Enhanced debugging in slub error | expand

Commit Message

Hyesoo Yu Feb. 20, 2025, 3:39 a.m. UTC
Previously, the restore occured after printing the object in slub.
After commit 47d911b02cbe ("slab: make check_object() more consistent"),
the bytes are printed after the restore. This information about the bytes
before the restore is highly valuable for debugging purpose.
For instance, in a event of cache issue, it displays byte patterns
by breaking them down into 64-bytes units. Without this information,
we can only speculate on how it was broken. Hence the corrupted regions
should be printed prior to the restoration process. However if an object
breaks in multiple places, the same log may be output multiple times.
Therefore the slub log is reported only once to prevent redundant printing,
by sending a parameter indicating whether an error has occurred previously.

Changes in v3:
- Change the parameter type of check_bytes_and_report.

Changes in v2:
- Instead of using print_section every time on check_bytes_and_report,
just print it once for the entire slub object before the restore.

Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
Change-Id: I73cf76c110eed62506643913517c957c05a29520
---
 mm/slub.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

Comments

Harry Yoo Feb. 20, 2025, 11:01 a.m. UTC | #1
On Thu, Feb 20, 2025 at 12:39:43PM +0900, Hyesoo Yu wrote:
> Previously, the restore occured after printing the object in slub.
> After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> the bytes are printed after the restore. This information about the bytes
> before the restore is highly valuable for debugging purpose.
> For instance, in a event of cache issue, it displays byte patterns
> by breaking them down into 64-bytes units. Without this information,
> we can only speculate on how it was broken. Hence the corrupted regions
> should be printed prior to the restoration process. However if an object
> breaks in multiple places, the same log may be output multiple times.
> Therefore the slub log is reported only once to prevent redundant printing,
> by sending a parameter indicating whether an error has occurred previously.
> 
> Changes in v3:
> - Change the parameter type of check_bytes_and_report.
> 
> Changes in v2:
> - Instead of using print_section every time on check_bytes_and_report,
> just print it once for the entire slub object before the restore.
>
> Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> Change-Id: I73cf76c110eed62506643913517c957c05a29520

As previously mentioned by others, Change-Id is not used in Linux
kernel development.

> ---
>  mm/slub.c | 29 ++++++++++++++---------------
>  1 file changed, 14 insertions(+), 15 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index b3969d63cc04..de62fed12236 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1192,12 +1192,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
>  
>  static pad_check_attributes int
>  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> -		       u8 *object, char *what,
> -		       u8 *start, unsigned int value, unsigned int bytes)
> +		       u8 *object, char *what, u8 *start, unsigned int value,
> +		       unsigned int bytes, bool slab_obj_print)
>  {
>  	u8 *fault;
>  	u8 *end;
>  	u8 *addr = slab_address(slab);
> +	char buf[100];
>  
>  	metadata_access_enable();
>  	fault = memchr_inv(kasan_reset_tag(start), value, bytes);
> @@ -1212,11 +1213,14 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>  	if (slab_add_kunit_errors())
>  		goto skip_bug_print;
>  
> -	slab_bug(s, "%s overwritten", what);
>  	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  
> +	scnprintf(buf, 100, "%s overwritten", what);

How about moving this into the if block and changing 100 to sizeof(buf)?

> +	if (slab_obj_print)
> +		object_err(s, slab, object, buf);
> +
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
>  	return 0;
> @@ -1279,7 +1283,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>  		return 1;
>  
>  	return check_bytes_and_report(s, slab, p, "Object padding",
> -			p + off, POISON_INUSE, size_from_object(s) - off);
> +			p + off, POISON_INUSE, size_from_object(s) - off, true);
>  }
>  
>  /* Check the pad bytes at the end of a slab page */
> @@ -1329,11 +1333,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  
>  	if (s->flags & SLAB_RED_ZONE) {
>  		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> -			object - s->red_left_pad, val, s->red_left_pad))
> +			object - s->red_left_pad, val, s->red_left_pad, !!ret))

I think you don't have to add !! to ret.
Converting from int to _Bool is legal in C99 and it will work as intended. 

>  			ret = 0;
Hyesoo Yu Feb. 20, 2025, 9:49 p.m. UTC | #2
On Thu, Feb 20, 2025 at 08:01:57PM +0900, Harry Yoo wrote:
> On Thu, Feb 20, 2025 at 12:39:43PM +0900, Hyesoo Yu wrote:
> > Previously, the restore occured after printing the object in slub.
> > After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> > the bytes are printed after the restore. This information about the bytes
> > before the restore is highly valuable for debugging purpose.
> > For instance, in a event of cache issue, it displays byte patterns
> > by breaking them down into 64-bytes units. Without this information,
> > we can only speculate on how it was broken. Hence the corrupted regions
> > should be printed prior to the restoration process. However if an object
> > breaks in multiple places, the same log may be output multiple times.
> > Therefore the slub log is reported only once to prevent redundant printing,
> > by sending a parameter indicating whether an error has occurred previously.
> > 
> > Changes in v3:
> > - Change the parameter type of check_bytes_and_report.
> > 
> > Changes in v2:
> > - Instead of using print_section every time on check_bytes_and_report,
> > just print it once for the entire slub object before the restore.
> >
> > Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> > Change-Id: I73cf76c110eed62506643913517c957c05a29520
> 
> As previously mentioned by others, Change-Id is not used in Linux
> kernel development.
>

Oops, It is my mistake. I will remove it.

> > ---
> >  mm/slub.c | 29 ++++++++++++++---------------
> >  1 file changed, 14 insertions(+), 15 deletions(-)
> > 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index b3969d63cc04..de62fed12236 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1192,12 +1192,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
> >  
> >  static pad_check_attributes int
> >  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> > -		       u8 *object, char *what,
> > -		       u8 *start, unsigned int value, unsigned int bytes)
> > +		       u8 *object, char *what, u8 *start, unsigned int value,
> > +		       unsigned int bytes, bool slab_obj_print)
> >  {
> >  	u8 *fault;
> >  	u8 *end;
> >  	u8 *addr = slab_address(slab);
> > +	char buf[100];
> >  
> >  	metadata_access_enable();
> >  	fault = memchr_inv(kasan_reset_tag(start), value, bytes);
> > @@ -1212,11 +1213,14 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> >  	if (slab_add_kunit_errors())
> >  		goto skip_bug_print;
> >  
> > -	slab_bug(s, "%s overwritten", what);
> >  	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> >  					fault, end - 1, fault - addr,
> >  					fault[0], value);
> >  
> > +	scnprintf(buf, 100, "%s overwritten", what);
> 
> How about moving this into the if block and changing 100 to sizeof(buf)?
>

That sounds good. I will change it.

> > +	if (slab_obj_print)
> > +		object_err(s, slab, object, buf);
> > +
> >  skip_bug_print:
> >  	restore_bytes(s, what, value, fault, end);
> >  	return 0;
> > @@ -1279,7 +1283,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
> >  		return 1;
> >  
> >  	return check_bytes_and_report(s, slab, p, "Object padding",
> > -			p + off, POISON_INUSE, size_from_object(s) - off);
> > +			p + off, POISON_INUSE, size_from_object(s) - off, true);
> >  }
> >  
> >  /* Check the pad bytes at the end of a slab page */
> > @@ -1329,11 +1333,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  
> >  	if (s->flags & SLAB_RED_ZONE) {
> >  		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> > -			object - s->red_left_pad, val, s->red_left_pad))
> > +			object - s->red_left_pad, val, s->red_left_pad, !!ret))
> 
> I think you don't have to add !! to ret.
> Converting from int to _Bool is legal in C99 and it will work as intended. 
> 

Thank you for informing me.
I remove !! to next version.

Thanks,
Regards.

> > 			ret = 0;
> 
> -- 
> Cheers,
> Harry
>
Harry Yoo Feb. 21, 2025, 8:16 a.m. UTC | #3
On Thu, Feb 20, 2025 at 12:39:43PM +0900, Hyesoo Yu wrote:
> Previously, the restore occured after printing the object in slub.
> After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> the bytes are printed after the restore. This information about the bytes
> before the restore is highly valuable for debugging purpose.
> For instance, in a event of cache issue, it displays byte patterns
> by breaking them down into 64-bytes units. Without this information,
> we can only speculate on how it was broken. Hence the corrupted regions
> should be printed prior to the restoration process. However if an object
> breaks in multiple places, the same log may be output multiple times.
> Therefore the slub log is reported only once to prevent redundant printing,
> by sending a parameter indicating whether an error has occurred previously.
> 
> Changes in v3:
> - Change the parameter type of check_bytes_and_report.
> 
> Changes in v2:
> - Instead of using print_section every time on check_bytes_and_report,
> just print it once for the entire slub object before the restore.
> 
> Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> Change-Id: I73cf76c110eed62506643913517c957c05a29520
> ---
>  mm/slub.c | 29 ++++++++++++++---------------
>  1 file changed, 14 insertions(+), 15 deletions(-)
> 

> @@ -1212,11 +1213,14 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>  	if (slab_add_kunit_errors())
>  		goto skip_bug_print;
>  
> -	slab_bug(s, "%s overwritten", what);
>  	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  
> +	scnprintf(buf, 100, "%s overwritten", what);
> +	if (slab_obj_print)
> +		object_err(s, slab, object, buf);


Wait, I think it's better to keep printing "%s overwritten" regardless
of slab_obj_print and only call __slab_err() if slab_obj_print == true
as discussed here [1]? Becuase in case there are multiple errors,
users should know.

[1] https://lore.kernel.org/all/2ff52c5e-4b6b-4b3d-9047-f00967315d3e@suse.cz
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index b3969d63cc04..de62fed12236 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1192,12 +1192,13 @@  static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 
 static pad_check_attributes int
 check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
-		       u8 *object, char *what,
-		       u8 *start, unsigned int value, unsigned int bytes)
+		       u8 *object, char *what, u8 *start, unsigned int value,
+		       unsigned int bytes, bool slab_obj_print)
 {
 	u8 *fault;
 	u8 *end;
 	u8 *addr = slab_address(slab);
+	char buf[100];
 
 	metadata_access_enable();
 	fault = memchr_inv(kasan_reset_tag(start), value, bytes);
@@ -1212,11 +1213,14 @@  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
 	if (slab_add_kunit_errors())
 		goto skip_bug_print;
 
-	slab_bug(s, "%s overwritten", what);
 	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
 					fault, end - 1, fault - addr,
 					fault[0], value);
 
+	scnprintf(buf, 100, "%s overwritten", what);
+	if (slab_obj_print)
+		object_err(s, slab, object, buf);
+
 skip_bug_print:
 	restore_bytes(s, what, value, fault, end);
 	return 0;
@@ -1279,7 +1283,7 @@  static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 		return 1;
 
 	return check_bytes_and_report(s, slab, p, "Object padding",
-			p + off, POISON_INUSE, size_from_object(s) - off);
+			p + off, POISON_INUSE, size_from_object(s) - off, true);
 }
 
 /* Check the pad bytes at the end of a slab page */
@@ -1329,11 +1333,11 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 
 	if (s->flags & SLAB_RED_ZONE) {
 		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
-			object - s->red_left_pad, val, s->red_left_pad))
+			object - s->red_left_pad, val, s->red_left_pad, !!ret))
 			ret = 0;
 
 		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
-			endobject, val, s->inuse - s->object_size))
+			endobject, val, s->inuse - s->object_size, !!ret))
 			ret = 0;
 
 		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
@@ -1342,7 +1346,7 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (s->object_size > orig_size  &&
 				!check_bytes_and_report(s, slab, object,
 					"kmalloc Redzone", p + orig_size,
-					val, s->object_size - orig_size)) {
+					val, s->object_size - orig_size, !!ret)) {
 				ret = 0;
 			}
 		}
@@ -1350,7 +1354,7 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
 			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
 				endobject, POISON_INUSE,
-				s->inuse - s->object_size))
+				s->inuse - s->object_size, !!ret))
 				ret = 0;
 		}
 	}
@@ -1366,11 +1370,11 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (kasan_meta_size < s->object_size - 1 &&
 			    !check_bytes_and_report(s, slab, p, "Poison",
 					p + kasan_meta_size, POISON_FREE,
-					s->object_size - kasan_meta_size - 1))
+					s->object_size - kasan_meta_size - 1, !!ret))
 				ret = 0;
 			if (kasan_meta_size < s->object_size &&
 			    !check_bytes_and_report(s, slab, p, "End Poison",
-					p + s->object_size - 1, POISON_END, 1))
+					p + s->object_size - 1, POISON_END, 1, !!ret))
 				ret = 0;
 		}
 		/*
@@ -1396,11 +1400,6 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 		ret = 0;
 	}
 
-	if (!ret && !slab_in_kunit_test()) {
-		print_trailer(s, slab, object);
-		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
-	}
-
 	return ret;
 }