[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z8BRt9a6PqDqBVsx@harry>
Date: Thu, 27 Feb 2025 20:51:19 +0900
From: Harry Yoo <harry.yoo@...cle.com>
To: Hyesoo Yu <hyesoo.yu@...sung.com>
Cc: janghyuck.kim@...sung.com, vbabka@...e.cz,
Christoph Lameter <cl@...ux.com>, Pekka Enberg <penberg@...nel.org>,
David Rientjes <rientjes@...gle.com>,
Joonsoo Kim <iamjoonsoo.kim@....com>,
Andrew Morton <akpm@...ux-foundation.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Hyeonggon Yoo <42.hyeyoo@...il.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 1/2] mm: slub: Print the broken data before restoring
slub.
On Wed, Feb 26, 2025 at 05:12:00PM +0900, Hyesoo Yu wrote:
> Previously, the restore occured after printing the object in slub.
> After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> the bytes are printed after the restore. This information about the bytes
> before the restore is highly valuable for debugging purpose.
> For instance, in a event of cache issue, it displays byte patterns
> by breaking them down into 64-bytes units. Without this information,
> we can only speculate on how it was broken. Hence the corrupted regions
> should be printed prior to the restoration process. However if an object
> breaks in multiple places, the same log may be output multiple times.
> Therefore the slub log is reported only once to prevent redundant printing,
> by sending a parameter indicating whether an error has occurred previously.
>
> Changes in v4:
> - Change the print format to include specific error names.
>
> Changes in v3:
> - Change the parameter type of check_bytes_and_report.
>
> Changes in v2:
> - Instead of using print_section every time on check_bytes_and_report,
> just print it once for the entire slub object before the restore.
IMHO it is not a good practice to include patch version changes
in the changelog, because the changelog should make sense on its own.
> Signed-off-by: Hyesoo Yu <hyesoo.yu@...sung.com>
> ---
I think that's why people usually place patch version log just below '---' line.
(More details can be found in the "Submitting patches" documentation
https://docs.kernel.org/process/submitting-patches.html#commentary)
Anyway, the code itself looks good to me (with a nit below).
Please feel free to add:
Reviewed-by: Harry Yoo <harry.yoo@...cle.com>
> mm/slub.c | 32 ++++++++++++++------------------
> 1 file changed, 14 insertions(+), 18 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index b3969d63cc04..8c13cd43c0fd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1192,8 +1192,8 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
>
> static pad_check_attributes int
> check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> - u8 *object, char *what,
> - u8 *start, unsigned int value, unsigned int bytes)
> + u8 *object, char *what, u8 *start, unsigned int value,
> + unsigned int bytes, bool slab_obj_print)
> {
> u8 *fault;
> u8 *end;
> @@ -1212,10 +1212,11 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> if (slab_add_kunit_errors())
> goto skip_bug_print;
>
> - slab_bug(s, "%s overwritten", what);
> - pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> - fault, end - 1, fault - addr,
> - fault[0], value);
> + pr_err("[%s overwritten] 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> + what, fault, end - 1, fault - addr, fault[0], value);
> +
> + if (slab_obj_print)
> + object_err(s, slab, object, "Object corrupt");
>
> skip_bug_print:
> restore_bytes(s, what, value, fault, end);
> @@ -1279,7 +1280,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
> return 1;
>
> return check_bytes_and_report(s, slab, p, "Object padding",
> - p + off, POISON_INUSE, size_from_object(s) - off);
> + p + off, POISON_INUSE, size_from_object(s) - off, true);
> }
>
> /* Check the pad bytes at the end of a slab page */
> @@ -1329,11 +1330,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>
> if (s->flags & SLAB_RED_ZONE) {
> if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> - object - s->red_left_pad, val, s->red_left_pad))
> + object - s->red_left_pad, val, s->red_left_pad, ret))
> ret = 0;
>
> if (!check_bytes_and_report(s, slab, object, "Right Redzone",
> - endobject, val, s->inuse - s->object_size))
> + endobject, val, s->inuse - s->object_size, ret))
> ret = 0;
>
> if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
> @@ -1342,7 +1343,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> if (s->object_size > orig_size &&
> !check_bytes_and_report(s, slab, object,
> "kmalloc Redzone", p + orig_size,
> - val, s->object_size - orig_size)) {
> + val, s->object_size - orig_size, ret)) {
> ret = 0;
> }
> }
> @@ -1350,7 +1351,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
> if (!check_bytes_and_report(s, slab, p, "Alignment padding",
> endobject, POISON_INUSE,
> - s->inuse - s->object_size))
> + s->inuse - s->object_size, ret))
> ret = 0;
> }
> }
> @@ -1366,11 +1367,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> if (kasan_meta_size < s->object_size - 1 &&
> !check_bytes_and_report(s, slab, p, "Poison",
> p + kasan_meta_size, POISON_FREE,
> - s->object_size - kasan_meta_size - 1))
> + s->object_size - kasan_meta_size - 1, ret))
> ret = 0;
> if (kasan_meta_size < s->object_size &&
> !check_bytes_and_report(s, slab, p, "End Poison",
> - p + s->object_size - 1, POISON_END, 1))
> + p + s->object_size - 1, POISON_END, 1, ret))
> ret = 0;
> }
> /*
> @@ -1396,11 +1397,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> ret = 0;
> }
>
> - if (!ret && !slab_in_kunit_test()) {
nit: check_object() was the only user of slab_in_kunit_test().
Can we remove it altogether?
--
Cheers,
Harry
> - print_trailer(s, slab, object);
> - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> - }
> -
> return ret;
> }
>
> --
> 2.28.0
>
Powered by blists - more mailing lists