linux-kernel - Re: [PATCH] mm: SLAB freelist randomization

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20160415150026.65abbdd5b2ef741cd070c769@linux-foundation.org>
Date:	Fri, 15 Apr 2016 15:00:26 -0700
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	Thomas Garnier <thgarnie@...gle.com>
Cc:	Christoph Lameter <cl@...ux.com>,
	Pekka Enberg <penberg@...nel.org>,
	David Rientjes <rientjes@...gle.com>,
	Joonsoo Kim <iamjoonsoo.kim@....com>,
	Kees Cook <keescook@...omium.org>, gthelen@...gle.com,
	labbott@...oraproject.org, kernel-hardening@...ts.openwall.com,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org,
	Kees Cook <keescook@...omium.org>
Subject: Re: [PATCH] mm: SLAB freelist randomization

On Fri, 15 Apr 2016 10:25:59 -0700 Thomas Garnier <thgarnie@...gle.com> wrote:

> Provide an optional config (CONFIG_FREELIST_RANDOM) to randomize the
> SLAB freelist. The list is randomized during initialization of a new set
> of pages. The order on different freelist sizes is pre-computed at boot
> for performance. This security feature reduces the predictability of the
> kernel SLAB allocator against heap overflows rendering attacks much less
> stable.
> 
> For example this attack against SLUB (also applicable against SLAB)
> would be affected:
> https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/
> 
> Also, since v4.6 the freelist was moved at the end of the SLAB. It means
> a controllable heap is opened to new attacks not yet publicly discussed.
> A kernel heap overflow can be transformed to multiple use-after-free.
> This feature makes this type of attack harder too.
> 
> The config option name is not specific to the SLAB as this approach will
> be extended to other allocators like SLUB.
> 
> Performance results highlighted no major changes:
>
> ...
>
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1229,6 +1229,61 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
>  	}
>  }
>  
> +#ifdef CONFIG_FREELIST_RANDOM
> +/*
> + * Master lists are pre-computed random lists
> + * Lists of different sizes are used to optimize performance on different
> + * SLAB object sizes per pages.

"object sizes per pages" doesn't make sense.  "object-per-page counts"?
"object sizes"?

> + */
> +static freelist_idx_t master_list_2[2];
> +static freelist_idx_t master_list_4[4];
> +static freelist_idx_t master_list_8[8];
> +static freelist_idx_t master_list_16[16];
> +static freelist_idx_t master_list_32[32];
> +static freelist_idx_t master_list_64[64];
> +static freelist_idx_t master_list_128[128];
> +static freelist_idx_t master_list_256[256];
> +static struct m_list {
> +	size_t count;
> +	freelist_idx_t *list;
> +} master_lists[] = {
> +	{ ARRAY_SIZE(master_list_2), master_list_2 },
> +	{ ARRAY_SIZE(master_list_4), master_list_4 },
> +	{ ARRAY_SIZE(master_list_8), master_list_8 },
> +	{ ARRAY_SIZE(master_list_16), master_list_16 },
> +	{ ARRAY_SIZE(master_list_32), master_list_32 },
> +	{ ARRAY_SIZE(master_list_64), master_list_64 },
> +	{ ARRAY_SIZE(master_list_128), master_list_128 },
> +	{ ARRAY_SIZE(master_list_256), master_list_256 },
> +};
> +
> +static void __init freelist_random_init(void)
> +{
> +	unsigned int seed;
> +	size_t z, i, rand;
> +	struct rnd_state slab_rand;
> +
> +	get_random_bytes_arch(&seed, sizeof(seed));

Using get_random_bytes_arch() seems a rather poor decision.  There are
the caveats described at the get_random_bytes_arch() definition site,
and the minor issue that get_random_bytes_arch() only actually works on
x86 and powerpc!

This is run-once __init code, so rather than adding the kernel's very
first get_random_bytes_arch() call site(!), why not stick with good old
get_random_bytes()?

If there's something I'm missing, please at least place a very good
comment here explaining the reasoning.

> +	prandom_seed_state(&slab_rand, seed);
> +
> +	for (z = 0; z < ARRAY_SIZE(master_lists); z++) {
> +		for (i = 0; i < master_lists[z].count; i++)
> +			master_lists[z].list[i] = i;
> +
> +		/* Fisher-Yates shuffle */
> +		for (i = master_lists[z].count - 1; i > 0; i--) {
> +			rand = prandom_u32_state(&slab_rand);
> +			rand %= (i + 1);
> +			swap(master_lists[z].list[i],
> +				master_lists[z].list[rand]);
> +		}
> +	}
> +}
> +#else
> +static inline void __init freelist_random_init(void) { }
> +#endif /* CONFIG_FREELIST_RANDOM */
> +
> +
>  /*
>   * Initialisation.  Called after the page allocator have been initialised and
>   * before smp_init().
>
> ...
>
> @@ -2442,6 +2499,101 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
>  #endif
>  }
>  
> +#ifdef CONFIG_FREELIST_RANDOM
> +enum master_type {
> +	match,
> +	less,
> +	more
> +};
> +
> +struct random_mng {

I can't work out what "mng" means in this code.

> +	unsigned int padding;
> +	unsigned int pos;
> +	unsigned int count;
> +	struct m_list master_list;
> +	unsigned int master_count;
> +	enum master_type type;
> +};

It would be useful to document the above struct.  Skilfully documenting
the data structures is key to making the code understandable.

> +static void random_mng_initialize(struct random_mng *mng, unsigned int count)
> +{
> +	unsigned int idx;
> +	const unsigned int last_idx = ARRAY_SIZE(master_lists) - 1;
> +
> +	memset(mng, 0, sizeof(*mng));
> +	mng->count = count;
> +	mng->pos = 0;
> +	/* count is >= 2 */
> +	idx = ilog2(count) - 1;

slab.c should now include log2.h.

> +	if (idx >= last_idx)
> +		idx = last_idx;
> +	else if (roundup_pow_of_two(idx + 1) != count)
> +		idx++;
> +	mng->master_list = master_lists[idx];
> +	if (mng->master_list.count == mng->count)
> +		mng->type = match;
> +	else if (mng->master_list.count > mng->count)
> +		mng->type = more;
> +	else
> +		mng->type = less;
> +}
> +
> +static freelist_idx_t get_next_entry(struct random_mng *mng)
> +{
> +	if (mng->type == less && mng->pos == mng->master_list.count) {
> +		mng->padding += mng->pos;
> +		mng->pos = 0;
> +	}
> +	BUG_ON(mng->pos >= mng->master_list.count);
> +	return mng->master_list.list[mng->pos++];
> +}
> +
> +static freelist_idx_t next_random_slot(struct random_mng *mng)
> +{
> +	freelist_idx_t cur, entry;
> +
> +	entry = get_next_entry(mng);
> +
> +	if (mng->type != match) {
> +		while ((entry + mng->padding) >= mng->count)
> +			entry = get_next_entry(mng);
> +		cur = entry + mng->padding;
> +		BUG_ON(cur >= mng->count);
> +	} else {
> +		cur = entry;
> +	}
> +
> +	return cur;
> +}
> +
> +static void shuffle_freelist(struct kmem_cache *cachep, struct page *page,
> +			     unsigned int count)
> +{
> +	unsigned int i;
> +	struct random_mng mng;
> +
> +	if (count < 2) {
> +		for (i = 0; i < count; i++)
> +			set_free_obj(page, i, i);
> +		return;
> +	}
> +
> +	/* Last chunk is used already in this case */
> +	if (OBJFREELIST_SLAB(cachep))
> +		count--;
> +
> +	random_mng_initialize(&mng, count);
> +	for (i = 0; i < count; i++)
> +		set_free_obj(page, i, next_random_slot(&mng));
> +
> +	if (OBJFREELIST_SLAB(cachep))
> +		set_free_obj(page, i, i);
> +}

Sorry, but the code is really too light on comments.  Each of the above
functions would benefit from a description of what they do and, most
importantly, why they do it.

Please address these things and let's wait for the slab maintainers
(and perhaps Kees?) to weigh in.

> +#else
> +static inline void shuffle_freelist(struct kmem_cache *cachep,
> +				    struct page *page, unsigned int count) { }
> +#endif /* CONFIG_FREELIST_RANDOM */
> +
>  static void cache_init_objs(struct kmem_cache *cachep,
>  			    struct page *page)
>  {
> @@ -2464,8 +2616,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
>  			kasan_poison_object_data(cachep, objp);
>  		}
>  
> -		set_free_obj(page, i, i);
> +		/* If enabled, initialization is done in shuffle_freelist */
> +		if (!config_enabled(CONFIG_FREELIST_RANDOM))
> +			set_free_obj(page, i, i);
>  	}
> +
> +	shuffle_freelist(cachep, page, cachep->num);
>  }
>  
>  static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)