lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZwTt-Sq5bsovQI5X@elver.google.com>
Date: Tue, 8 Oct 2024 10:31:53 +0200
From: Marco Elver <elver@...gle.com>
To: Sabyrzhan Tasbolatov <snovitoll@...il.com>
Cc: akpm@...ux-foundation.org, andreyknvl@...il.com, bpf@...r.kernel.org,
	dvyukov@...gle.com, glider@...gle.com, kasan-dev@...glegroups.com,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org,
	ryabinin.a.a@...il.com,
	syzbot+61123a5daeb9f7454599@...kaller.appspotmail.com,
	vincenzo.frascino@....com
Subject: Re: [PATCH v2 1/1] mm, kasan, kmsan: copy_from/to_kernel_nofault

On Sat, Oct 05, 2024 at 09:48PM +0500, Sabyrzhan Tasbolatov wrote:
> Instrument copy_from_kernel_nofault() with KMSAN for uninitialized kernel
> memory check and copy_to_kernel_nofault() with KASAN, KCSAN to detect
> the memory corruption.
> 
> syzbot reported that bpf_probe_read_kernel() kernel helper triggered
> KASAN report via kasan_check_range() which is not the expected behaviour
> as copy_from_kernel_nofault() is meant to be a non-faulting helper.
> 
> Solution is, suggested by Marco Elver, to replace KASAN, KCSAN check in
> copy_from_kernel_nofault() with KMSAN detection of copying uninitilaized
> kernel memory. In copy_to_kernel_nofault() we can retain
> instrument_write() for the memory corruption instrumentation but before
> pagefault_disable().

I don't understand why it has to be before the whole copy i.e. before
pagefault_disable()?

I think my suggestion was to only check the memory where no fault
occurred. See below.

> diff --git a/mm/maccess.c b/mm/maccess.c
> index 518a25667323..a91a39a56cfd 100644
> --- a/mm/maccess.c
> +++ b/mm/maccess.c
> @@ -15,7 +15,7 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
>  
>  #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)	\
>  	while (len >= sizeof(type)) {					\
> -		__get_kernel_nofault(dst, src, type, err_label);		\
> +		__get_kernel_nofault(dst, src, type, err_label);	\
>  		dst += sizeof(type);					\
>  		src += sizeof(type);					\
>  		len -= sizeof(type);					\
> @@ -31,6 +31,8 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
>  	if (!copy_from_kernel_nofault_allowed(src, size))
>  		return -ERANGE;
>  
> +	/* Make sure uninitialized kernel memory isn't copied. */
> +	kmsan_check_memory(src, size);
>  	pagefault_disable();
>  	if (!(align & 7))
>  		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
> @@ -49,7 +51,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
>  
>  #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)	\
>  	while (len >= sizeof(type)) {					\
> -		__put_kernel_nofault(dst, src, type, err_label);		\
> +		__put_kernel_nofault(dst, src, type, err_label);	\
>  		dst += sizeof(type);					\
>  		src += sizeof(type);					\
>  		len -= sizeof(type);					\
> @@ -62,6 +64,7 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
>  	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
>  		align = (unsigned long)dst | (unsigned long)src;
>  
> +	instrument_write(dst, size);
>  	pagefault_disable();

So this will check the whole range before the access. But if the copy
aborts because of a fault, then we may still end up with false
positives.

Why not something like the below - normally we check the accesses
before, but these are debug kernels anyway, so I see no harm in making
an exception in this case and checking the memory if there was no fault
i.e. it didn't jump to err_label yet. It's also slower because of
repeated calls, but these helpers aren't frequently used.

The alternative is to do the sanitizer check after the entire copy if we
know there was no fault at all. But that may still hide real bugs if
e.g. it starts copying some partial memory and then accesses an
unfaulted page.


diff --git a/mm/maccess.c b/mm/maccess.c
index a91a39a56cfd..3ca55ec63a6a 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -13,9 +13,14 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
 	return true;
 }
 
+/*
+ * The below only uses kmsan_check_memory() to ensure uninitialized kernel
+ * memory isn't leaked.
+ */
 #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
 		__get_kernel_nofault(dst, src, type, err_label);	\
+		kmsan_check_memory(src, sizeof(type));			\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -31,8 +36,6 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!copy_from_kernel_nofault_allowed(src, size))
 		return -ERANGE;
 
-	/* Make sure uninitialized kernel memory isn't copied. */
-	kmsan_check_memory(src, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
@@ -52,6 +55,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
 #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
 		__put_kernel_nofault(dst, src, type, err_label);	\
+		instrument_write(dst, sizeof(type));			\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -64,7 +68,6 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		align = (unsigned long)dst | (unsigned long)src;
 
-	instrument_write(dst, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ