linux-kernel - Re: [PATCH v10 01/17] Add kernel address sanitizer infrastructure.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20150129151213.09d1f9e0a01490712d0eb071@linux-foundation.org>
Date:	Thu, 29 Jan 2015 15:12:13 -0800
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	Andrey Ryabinin <a.ryabinin@...sung.com>
Cc:	linux-kernel@...r.kernel.org, Dmitry Vyukov <dvyukov@...gle.com>,
	Konstantin Serebryany <kcc@...gle.com>,
	Dmitry Chernenkov <dmitryc@...gle.com>,
	Andrey Konovalov <adech.fo@...il.com>,
	Yuri Gribov <tetra2005@...il.com>,
	Konstantin Khlebnikov <koct9i@...il.com>,
	Sasha Levin <sasha.levin@...cle.com>,
	Christoph Lameter <cl@...ux.com>,
	Joonsoo Kim <iamjoonsoo.kim@....com>,
	Dave Hansen <dave.hansen@...el.com>,
	Andi Kleen <andi@...stfloor.org>, x86@...nel.org,
	linux-mm@...ck.org, Jonathan Corbet <corbet@....net>,
	Michal Marek <mmarek@...e.cz>, Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	linux-doc@...r.kernel.org (open list:DOCUMENTATION),
	linux-kbuild@...r.kernel.org (open list:KERNEL BUILD + fi...)
Subject: Re: [PATCH v10 01/17] Add kernel address sanitizer infrastructure.

On Thu, 29 Jan 2015 18:11:45 +0300 Andrey Ryabinin <a.ryabinin@...sung.com> wrote:

> Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides
> fast and comprehensive solution for finding use-after-free and out-of-bounds bugs.
> 
> KASAN uses compile-time instrumentation for checking every memory access,
> therefore GCC >= v4.9.2 required.
> 
> ...
>
> Based on work by Andrey Konovalov <adech.fo@...il.com>

Can we obtain Andrey's signed-off-by: please?
 
> +void kasan_unpoison_shadow(const void *address, size_t size)
> +{
> +	kasan_poison_shadow(address, size, 0);
> +
> +	if (size & KASAN_SHADOW_MASK) {
> +		u8 *shadow = (u8 *)kasan_mem_to_shadow((unsigned long)address
> +						+ size);
> +		*shadow = size & KASAN_SHADOW_MASK;
> +	}
> +}

There's a lot of typecasting happening with kasan_mem_to_shadow().  In
this patch the return value gets typecast more often than not, and the
argument gets cast quite a lot as well.  I suspect the code would turn
out better if kasan_mem_to_shadow() were to take a (const?) void* arg
and were to return a void*.

> +static __always_inline bool memory_is_poisoned_1(unsigned long addr)

What's with all the __always_inline in this file?  When I remove them
all, kasan.o .text falls from 8294 bytes down to 4543 bytes.  That's
massive, and quite possibly faster.

If there's some magical functional reason for this then can we please
get a nice prominent comment into this code apologetically explaining
it?

> +{
> +	s8 shadow_value = *(s8 *)kasan_mem_to_shadow(addr);
> +
> +	if (unlikely(shadow_value)) {
> +		s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
> +		return unlikely(last_accessible_byte >= shadow_value);
> +	}
> +
> +	return false;
> +}
> +
> 
> ...
>
> +
> +#define DECLARE_ASAN_CHECK(size)				\

DEFINE_ASAN_CHECK would be more accurate.  Because this macro expands
to definitions, not declarations.

> +	void __asan_load##size(unsigned long addr)		\
> +	{							\
> +		check_memory_region(addr, size, false);		\
> +	}							\
> +	EXPORT_SYMBOL(__asan_load##size);			\
> +	__attribute__((alias("__asan_load"#size)))		\
> +	void __asan_load##size##_noabort(unsigned long);	\
> +	EXPORT_SYMBOL(__asan_load##size##_noabort);		\
> +	void __asan_store##size(unsigned long addr)		\
> +	{							\
> +		check_memory_region(addr, size, true);		\
> +	}							\
> +	EXPORT_SYMBOL(__asan_store##size);			\
> +	__attribute__((alias("__asan_store"#size)))		\
> +	void __asan_store##size##_noabort(unsigned long);	\
> +	EXPORT_SYMBOL(__asan_store##size##_noabort)
> +
> +DECLARE_ASAN_CHECK(1);
> +DECLARE_ASAN_CHECK(2);
> +DECLARE_ASAN_CHECK(4);
> +DECLARE_ASAN_CHECK(8);
> +DECLARE_ASAN_CHECK(16);
> +
> +void __asan_loadN(unsigned long addr, size_t size)
> +{
> +	check_memory_region(addr, size, false);
> +}
> +EXPORT_SYMBOL(__asan_loadN);
> +
> +__attribute__((alias("__asan_loadN")))

Maybe we need a __alias.  Like __packed and various other helpers.

> +void __asan_loadN_noabort(unsigned long, size_t);
> +EXPORT_SYMBOL(__asan_loadN_noabort);
> +
> +void __asan_storeN(unsigned long addr, size_t size)
> +{
> +	check_memory_region(addr, size, true);
> +}
> +EXPORT_SYMBOL(__asan_storeN);
> +
> +__attribute__((alias("__asan_storeN")))
> +void __asan_storeN_noabort(unsigned long, size_t);
> +EXPORT_SYMBOL(__asan_storeN_noabort);
> +
> +/* to shut up compiler complaints */
> +void __asan_handle_no_return(void) {}
> +EXPORT_SYMBOL(__asan_handle_no_return);
> diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> new file mode 100644
> index 0000000..da0e53c
> --- /dev/null
> +++ b/mm/kasan/kasan.h
> @@ -0,0 +1,47 @@
> +#ifndef __MM_KASAN_KASAN_H
> +#define __MM_KASAN_KASAN_H
> +
> +#include <linux/kasan.h>
> +
> +#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
> +#define KASAN_SHADOW_MASK       (KASAN_SHADOW_SCALE_SIZE - 1)
> +
> +struct access_info {

kasan_access_info would be a better name.

> +	unsigned long access_addr;
> +	unsigned long first_bad_addr;
> +	size_t access_size;
> +	bool is_write;
> +	unsigned long ip;
> +};
> +
> +void kasan_report_error(struct access_info *info);
> +void kasan_report_user_access(struct access_info *info);
> +
> +static inline unsigned long kasan_shadow_to_mem(unsigned long shadow_addr)
> +{
> +	return (shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
> +}
> +
> +static inline bool kasan_enabled(void)
> +{
> +	return !current->kasan_depth;
> +}
> +
> +static __always_inline void kasan_report(unsigned long addr,
> +					size_t size,
> +					bool is_write)

Again, why the inline?  This is presumably not a hotpath and
kasan_report has sixish call sites.


> +{
> +	struct access_info info;
> +
> +	if (likely(!kasan_enabled()))
> +		return;
> +
> +	info.access_addr = addr;
> +	info.access_size = size;
> +	info.is_write = is_write;
> +	info.ip = _RET_IP_;
> +	kasan_report_error(&info);
> +}
> 
> ...
>
> +static void print_error_description(struct access_info *info)
> +{
> +	const char *bug_type = "unknown crash";
> +	u8 shadow_val;
> +
> +	info->first_bad_addr = find_first_bad_addr(info->access_addr,
> +						info->access_size);
> +
> +	shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr);
> +
> +	switch (shadow_val) {
> +	case 0 ... KASAN_SHADOW_SCALE_SIZE - 1:
> +		bug_type = "out of bounds access";
> +		break;
> +	}
> +
> +	pr_err("BUG: AddressSanitizer: %s in %pS at addr %p\n",

Sometimes it's called "kasan", sometimes "AddressSanitizer".  Wouldn't
it be better to use the same name everywhere?

> +		bug_type, (void *)info->ip,
> +		(void *)info->access_addr);
> +	pr_err("%s of size %zu by task %s/%d\n",
> +		info->is_write ? "Write" : "Read",
> +		info->access_size, current->comm, task_pid_nr(current));
> +}
> +
> +static void print_address_description(struct access_info *info)
> +{
> +	dump_stack();
> +}

dump_stack() uses KERN_INFO but the callers or
print_address_description() use KERN_ERR.  This means that at some
settings of `dmesg -n', the kasan output will have large missing
chunks.

Please test this and deide how bad it is.  A proper fix will be
somewhat messy (new_dump_stack(KERN_ERR)).

> +static bool row_is_guilty(unsigned long row, unsigned long guilty)
> +{
> +	return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW);
> +}
> +
> +static int shadow_pointer_offset(unsigned long row, unsigned long shadow)
> +{
> +	/* The length of ">ff00ff00ff00ff00: " is
> +	 *    3 + (BITS_PER_LONG/8)*2 chars.
> +	 */
> +	return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 +
> +		(shadow - row) / SHADOW_BYTES_PER_BLOCK + 1;
> +}
> +
> +static void print_shadow_for_address(unsigned long addr)
> +{
> +	int i;
> +	unsigned long shadow = kasan_mem_to_shadow(addr);
> +	unsigned long aligned_shadow = round_down(shadow, SHADOW_BYTES_PER_ROW)
> +		- SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW;

You don't *have* to initialize at the definition site.  You can do

	unsigned long aligned_shadow;
	...
	aligned_shadow = ...;

and the 80-col tricks often come out looking better.

> +	pr_err("Memory state around the buggy address:\n");
> +
> +	for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) {
> +		unsigned long kaddr = kasan_shadow_to_mem(aligned_shadow);
> +		char buffer[4 + (BITS_PER_LONG/8)*2];
> +
> +		snprintf(buffer, sizeof(buffer),
> +			(i == 0) ? ">%lx: " : " %lx: ", kaddr);
> +
> +		kasan_disable_local();
> +		print_hex_dump(KERN_ERR, buffer,
> +			DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1,
> +			(void *)aligned_shadow, SHADOW_BYTES_PER_ROW, 0);
> +		kasan_enable_local();
> +
> +		if (row_is_guilty(aligned_shadow, shadow))
> +			pr_err("%*c\n",
> +				shadow_pointer_offset(aligned_shadow, shadow),
> +				'^');
> +
> +		aligned_shadow += SHADOW_BYTES_PER_ROW;
> +	}
> +}
> 
> ...
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/