lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <202106021009.A556DFB7F2@keescook>
Date:   Wed, 2 Jun 2021 10:31:19 -0700
From:   Kees Cook <keescook@...omium.org>
To:     Jarmo Tiitto <jarmo.tiitto@...il.com>
Cc:     Sami Tolvanen <samitolvanen@...gle.com>,
        Bill Wendling <wcw@...gle.com>,
        Nathan Chancellor <nathan@...nel.org>,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        clang-built-linux@...glegroups.com, linux-kernel@...r.kernel.org,
        morbo@...gle.com
Subject: Re: [PATCH 1/1] pgo: Fix sleep in atomic section in prf_open()

On Wed, Jun 02, 2021 at 07:26:40PM +0300, Jarmo Tiitto wrote:
> In prf_open() the required buffer size can be so large that
> vzalloc() may sleep thus triggering bug:
> 
> ======
>  BUG: sleeping function called from invalid context at include/linux/sched/mm.h:201
>  in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 337, name: cat
>  CPU: 1 PID: 337 Comm: cat Not tainted 5.13.0-rc2-24-hack+ #154
>  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
>  Call Trace:
>   dump_stack+0xc7/0x134
>   ___might_sleep+0x177/0x190
>   __might_sleep+0x5a/0x90
>   kmem_cache_alloc_node_trace+0x6b/0x3a0
>   ? __get_vm_area_node+0xcd/0x1b0
>   ? dput+0x283/0x300
>   __get_vm_area_node+0xcd/0x1b0
>   __vmalloc_node_range+0x7b/0x420
>   ? prf_open+0x1da/0x580
>   ? prf_open+0x32/0x580
>   ? __llvm_profile_instrument_memop+0x36/0x50
>   vzalloc+0x54/0x60
>   ? prf_open+0x1da/0x580
>   prf_open+0x1da/0x580
>   full_proxy_open+0x211/0x370
>   ....
> ======

Ah-ha; nice catch!

> 
> This patch avoids holding the prf_lock() while calling
> vzalloc(). Problem with that is prf_buffer_size()
> *must* be called with prf_lock() held and the buffer
> size may change while we call vzalloc()
> 
> So first get buffer size, release the lock and allocate.
> Then re-lock and call prf_serialize() that now checks if
> the buffer is big enough. If not, the code loops.
> 
> Signed-off-by: Jarmo Tiitto <jarmo.tiitto@...il.com>
> ---
>  kernel/pgo/fs.c | 45 +++++++++++++++++++++++++++++++++++----------
>  1 file changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> index ef985159dad3..e8ac07637423 100644
> --- a/kernel/pgo/fs.c
> +++ b/kernel/pgo/fs.c
> @@ -227,16 +227,15 @@ static unsigned long prf_buffer_size(void)
>   * Serialize the profiling data into a format LLVM's tools can understand.
>   * Note: caller *must* hold pgo_lock.
>   */
> -static int prf_serialize(struct prf_private_data *p)
> +static int prf_serialize(struct prf_private_data *p, unsigned long *buf_size)
>  {
>  	int err = 0;
>  	void *buffer;
>  
> -	p->size = prf_buffer_size();
> -	p->buffer = vzalloc(p->size);
> +	*buf_size = prf_buffer_size();
>  
> -	if (!p->buffer) {
> -		err = -ENOMEM;
> +	if (p->size < *bufsize) {

Nit: please change prf_private_data::size to size_t while you're
touching this code.

> +		err = -EAGAIN;
>  		goto out;
>  	}
>  
> @@ -259,6 +258,7 @@ static int prf_open(struct inode *inode, struct file *file)
>  {
>  	struct prf_private_data *data;
>  	unsigned long flags;
> +	unsigned long buf_size;
>  	int err;
>  
>  	data = kzalloc(sizeof(*data), GFP_KERNEL);
> @@ -267,14 +267,39 @@ static int prf_open(struct inode *inode, struct file *file)
>  		goto out;
>  	}
>  
> +	/* note: vzalloc() can be used in atomic section.
> +	 * However to get the buffer size prf_lock() *must*
> +	 * be taken. So take lock, get buffer size, release
> +	 * the lock and allocate.
> +	 * prf_serialize() then checks if buffer has enough space.
> +	 */
>  	flags = prf_lock();
> +	buf_size = prf_buffer_size();
>  
> -	err = prf_serialize(data);
> -	if (unlikely(err)) {
> -		kfree(data);
> -		goto out_unlock;
> -	}
> +	do {
> +		prf_unlock(flags);
> +
> +		/* resize buffer */
> +		if (data->size < buf_size && data->buffer) {
> +			vfree(data->buffer);
> +			data->buffer = NULL;
> +		}
> +
> +		if (!data->buffer) {
> +			data->size = buf_size;
> +			data->buffer = vzalloc(data->size);
> +
> +			if (!data->buffer) {
> +				err = -ENOMEM;
> +				kfree(data);
> +				goto out;
> +			}
> +		}
> +		/* try serialize */
> +		flags = prf_lock();
> +	} while (prf_serialize(data, &buf_size));

I'm not a fan of loops where it's hard to answer the question "how do we
know this loop will always terminate?"

Given that vmalloc allocates PAGE_SIZE-granular regions, how about
rounding up to likely avoid multiple passes and put the growth explicitly
in the loop, rather than just looking at "any" prf_serialize() failure.

e.g.:

	struct prf_private_data *data;
	int err = -ENOMEM;

	data = kzalloc(sizeof(*data), GFP_KERNEL);
	if (!data)
		goto out_free;

	do {
		unsigned long flags;
		size_t size;

		size = PAGE_ALIGN(prf_buffer_size());
		/* Required buffer size must be growing with each loop. */
		if (WARN_ON_ONCE(size <= data->size)) {
			err = -ENOMEM;
			goto out_free;
		}

		if (data->buf)
			vfree(data->buf);
		data->buf = vzalloc(size);
		if (!data->buf) {
			err = -ENOMEM;
			goto out_free;
		}
		data->size = size;

		flags = prf_lock();
		err = prf_serialize(data);
		prf_unlock(flags);
	} while (err == -EAGAIN);

	if (err)
		goto out_free;

	file->private_data = data;
	return 0;

out_free:
	if (data)
		vfree(data->buf);
	kfree(data);
	return err;


>  
> +	data->size = buf_size;
>  	file->private_data = data;
>  
>  out_unlock:
> 
> base-commit: e1af496cbe9b4517428601a4e44fee3602dd3c15
> prerequisite-patch-id: fccc1bd89bbd33af13a4ce9bc3c913e6e3cdecee
> prerequisite-patch-id: a2e53c0b44ad39c78ed7bc7aad40d133548a13b5
> prerequisite-patch-id: 12f0e468a3d0ff12c7f5bc640f213be3b5dd261b
> prerequisite-patch-id: 707b836b1969958b5131dfa1b9f044eae5f4a76a
> -- 
> 2.31.1
> 

-- 
Kees Cook

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ