[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <006101d21565$b60a8a70$221f9f50$@alibaba-inc.com>
Date:   Fri, 23 Sep 2016 14:42:53 +0800
From:   "Hillf Danton" <hillf.zj@...baba-inc.com>
To:     "'Vlastimil Babka'" <vbabka@...e.cz>,
        "'Alexander Viro'" <viro@...iv.linux.org.uk>
Cc:     <linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <linux-mm@...ck.org>, "'Michal Hocko'" <mhocko@...nel.org>,
        <netdev@...r.kernel.org>
Subject: Re: [PATCH] fs/select: add vmalloc fallback for select(2)
> 
> The select(2) syscall performs a kmalloc(size, GFP_KERNEL) where size grows
> with the number of fds passed. We had a customer report page allocation
> failures of order-4 for this allocation. This is a costly order, so it might
> easily fail, as the VM expects such allocation to have a lower-order fallback.
> 
> Such trivial fallback is vmalloc(), as the memory doesn't have to be
> physically contiguous. Also the allocation is temporary for the duration of the
> syscall, so it's unlikely to stress vmalloc too much.
> 
> Note that the poll(2) syscall seems to use a linked list of order-0 pages, so
> it doesn't need this kind of fallback.
> 
> Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
> ---
>  fs/select.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/select.c b/fs/select.c
> index 8ed9da50896a..8fe5bddbe99b 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -29,6 +29,7 @@
>  #include <linux/sched/rt.h>
>  #include <linux/freezer.h>
>  #include <net/busy_poll.h>
> +#include <linux/vmalloc.h>
> 
>  #include <asm/uaccess.h>
> 
> @@ -558,6 +559,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
>  	struct fdtable *fdt;
>  	/* Allocate small arguments on the stack to save memory and be faster */
>  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
> +	unsigned long alloc_size;
> 
>  	ret = -EINVAL;
>  	if (n < 0)
> @@ -580,10 +582,15 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
>  	bits = stack_fds;
>  	if (size > sizeof(stack_fds) / 6) {
>  		/* Not enough space in on-stack array; must use kmalloc */
> +		alloc_size = 6 * size;
>  		ret = -ENOMEM;
> -		bits = kmalloc(6 * size, GFP_KERNEL);
> -		if (!bits)
> -			goto out_nofds;
> +		bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN);
> +		if (!bits && alloc_size > PAGE_SIZE) {
> +			bits = vmalloc(alloc_size);
> +
> +			if (!bits)
> +				goto out_nofds;
> +		}
Looks like we also have to bail out if kmalloc fails with 
alloc_size less than PAGE_SIZE.
thanks
Hillf
>  	}
>  	fds.in      = bits;
>  	fds.out     = bits +   size;
> @@ -618,7 +625,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
> 
>  out:
>  	if (bits != stack_fds)
> -		kfree(bits);
> +		kvfree(bits);
>  out_nofds:
>  	return ret;
>  }
> --
> 2.10.0
Powered by blists - more mailing lists
 
