linux-kernel - Re: [RFC PATCH v2 for 4.15 08/14] Provide cpu

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20171107030355.GB6095@tardis>
Date:   Tue, 7 Nov 2017 11:03:55 +0800
From:   Boqun Feng <boqun.feng@...il.com>
To:     Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
        Andy Lutomirski <luto@...capital.net>,
        Dave Watson <davejwatson@...com>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        linux-api <linux-api@...r.kernel.org>,
        Paul Turner <pjt@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Russell King <linux@....linux.org.uk>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>, Andrew Hunter <ahh@...gle.com>,
        Andi Kleen <andi@...stfloor.org>, Chris Lameter <cl@...ux.com>,
        Ben Maurer <bmaurer@...com>, rostedt <rostedt@...dmis.org>,
        Josh Triplett <josh@...htriplett.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will.deacon@....com>,
        Michael Kerrisk <mtk.manpages@...il.com>
Subject: Re: [RFC PATCH v2 for 4.15 08/14] Provide cpu_opv system call

On Tue, Nov 07, 2017 at 02:40:37AM +0000, Mathieu Desnoyers wrote:
> ----- On Nov 6, 2017, at 9:07 PM, Boqun Feng boqun.feng@...il.com wrote:
> 
> > On Mon, Nov 06, 2017 at 03:56:38PM -0500, Mathieu Desnoyers wrote:
> > [...]
> >> +static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
> >> +		struct page ***pinned_pages_ptr, size_t *nr_pinned,
> >> +		int write)
> >> +{
> >> +	struct page *pages[2];
> >> +	int ret, nr_pages;
> >> +
> >> +	if (!len)
> >> +		return 0;
> >> +	nr_pages = cpu_op_range_nr_pages(addr, len);
> >> +	BUG_ON(nr_pages > 2);
> >> +	if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) {
> > 
> > Is this a bug? Seems you will kzalloc() every time if *nr_pinned is
> > bigger than NR_PINNED_PAGES_ON_STACK, which will result in memory
> > leaking.
> > 
> > I think the logic here is complex enough for us to introduce a
> > structure, like:
> > 
> >	struct cpu_opv_page_pinner {
> >		int nr_pinned;
> >		bool is_kmalloc;
> >		struct page **pinned_pages;
> >	};
> > 
> > Thoughts?
> 
> Good catch !
> 
> How about the attached diff ? I'll fold it into the rseq/dev tree.
> 

Looks good to me ;-)

Regards,
Boqun

> Thanks,
> 
> Mathieu
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com

> diff --git a/kernel/cpu_opv.c b/kernel/cpu_opv.c
> index 09754bbe6a4f..3d8fd66416a0 100644
> --- a/kernel/cpu_opv.c
> +++ b/kernel/cpu_opv.c
> @@ -46,6 +46,12 @@ union op_fn_data {
>  #endif
>  };
>  
> +struct cpu_opv_pinned_pages {
> +	struct page **pages;
> +	size_t nr;
> +	bool is_kmalloc;
> +};
> +
>  typedef int (*op_fn_t)(union op_fn_data *data, uint64_t v, uint32_t len);
>  
>  static DEFINE_MUTEX(cpu_opv_offline_lock);
> @@ -217,8 +223,7 @@ static int cpu_op_check_pages(struct page **pages,
>  }
>  
>  static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
> -		struct page ***pinned_pages_ptr, size_t *nr_pinned,
> -		int write)
> +		struct cpu_opv_pinned_pages *pin_pages, int write)
>  {
>  	struct page *pages[2];
>  	int ret, nr_pages;
> @@ -227,15 +232,17 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
>  		return 0;
>  	nr_pages = cpu_op_range_nr_pages(addr, len);
>  	BUG_ON(nr_pages > 2);
> -	if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) {
> +	if (!pin_pages->is_kmalloc && pin_pages->nr + nr_pages
> +			> NR_PINNED_PAGES_ON_STACK) {
>  		struct page **pinned_pages =
>  			kzalloc(CPU_OP_VEC_LEN_MAX * CPU_OP_MAX_PAGES
>  				* sizeof(struct page *), GFP_KERNEL);
>  		if (!pinned_pages)
>  			return -ENOMEM;
> -		memcpy(pinned_pages, *pinned_pages_ptr,
> -			*nr_pinned * sizeof(struct page *));
> -		*pinned_pages_ptr = pinned_pages;
> +		memcpy(pinned_pages, pin_pages->pages,
> +			pin_pages->nr * sizeof(struct page *));
> +		pin_pages->pages = pinned_pages;
> +		pin_pages->is_kmalloc = true;
>  	}
>  again:
>  	ret = get_user_pages_fast(addr, nr_pages, write, pages);
> @@ -257,9 +264,9 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
>  	}
>  	if (ret)
>  		goto error;
> -	(*pinned_pages_ptr)[(*nr_pinned)++] = pages[0];
> +	pin_pages->pages[pin_pages->nr++] = pages[0];
>  	if (nr_pages > 1)
> -		(*pinned_pages_ptr)[(*nr_pinned)++] = pages[1];
> +		pin_pages->pages[pin_pages->nr++] = pages[1];
>  	return 0;
>  
>  error:
> @@ -270,7 +277,7 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
>  }
>  
>  static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
> -		struct page ***pinned_pages_ptr, size_t *nr_pinned)
> +		struct cpu_opv_pinned_pages *pin_pages)
>  {
>  	int ret, i;
>  	bool expect_fault = false;
> @@ -289,7 +296,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.compare_op.a,
> -					op->len, pinned_pages_ptr, nr_pinned, 0);
> +					op->len, pin_pages, 0);
>  			if (ret)
>  				goto error;
>  			ret = -EFAULT;
> @@ -299,7 +306,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.compare_op.b,
> -					op->len, pinned_pages_ptr, nr_pinned, 0);
> +					op->len, pin_pages, 0);
>  			if (ret)
>  				goto error;
>  			break;
> @@ -311,7 +318,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.memcpy_op.dst,
> -					op->len, pinned_pages_ptr, nr_pinned, 1);
> +					op->len, pin_pages, 1);
>  			if (ret)
>  				goto error;
>  			ret = -EFAULT;
> @@ -321,7 +328,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.memcpy_op.src,
> -					op->len, pinned_pages_ptr, nr_pinned, 0);
> +					op->len, pin_pages, 0);
>  			if (ret)
>  				goto error;
>  			break;
> @@ -333,7 +340,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.arithmetic_op.p,
> -					op->len, pinned_pages_ptr, nr_pinned, 1);
> +					op->len, pin_pages, 1);
>  			if (ret)
>  				goto error;
>  			break;
> @@ -347,7 +354,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.bitwise_op.p,
> -					op->len, pinned_pages_ptr, nr_pinned, 1);
> +					op->len, pin_pages, 1);
>  			if (ret)
>  				goto error;
>  			break;
> @@ -360,7 +367,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  				goto error;
>  			ret = cpu_op_pin_pages(
>  					(unsigned long)op->u.shift_op.p,
> -					op->len, pinned_pages_ptr, nr_pinned, 1);
> +					op->len, pin_pages, 1);
>  			if (ret)
>  				goto error;
>  			break;
> @@ -373,9 +380,9 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
>  	return 0;
>  
>  error:
> -	for (i = 0; i < *nr_pinned; i++)
> -		put_page((*pinned_pages_ptr)[i]);
> -	*nr_pinned = 0;
> +	for (i = 0; i < pin_pages->nr; i++)
> +		put_page(pin_pages->pages[i]);
> +	pin_pages->nr = 0;
>  	/*
>  	 * If faulting access is expected, return EAGAIN to user-space.
>  	 * It allows user-space to distinguish between a fault caused by
> @@ -923,9 +930,12 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt,
>  {
>  	struct cpu_op cpuopv[CPU_OP_VEC_LEN_MAX];
>  	struct page *pinned_pages_on_stack[NR_PINNED_PAGES_ON_STACK];
> -	struct page **pinned_pages = pinned_pages_on_stack;
> +	struct cpu_opv_pinned_pages pin_pages = {
> +		.pages = pinned_pages_on_stack,
> +		.nr = 0,
> +		.is_kmalloc = false,
> +	};
>  	int ret, i;
> -	size_t nr_pinned = 0;
>  
>  	if (unlikely(flags))
>  		return -EINVAL;
> @@ -938,15 +948,14 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt,
>  	ret = cpu_opv_check(cpuopv, cpuopcnt);
>  	if (ret)
>  		return ret;
> -	ret = cpu_opv_pin_pages(cpuopv, cpuopcnt,
> -				&pinned_pages, &nr_pinned);
> +	ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, &pin_pages);
>  	if (ret)
>  		goto end;
>  	ret = do_cpu_opv(cpuopv, cpuopcnt, cpu);
> -	for (i = 0; i < nr_pinned; i++)
> -		put_page(pinned_pages[i]);
> +	for (i = 0; i < pin_pages.nr; i++)
> +		put_page(pin_pages.pages[i]);
>  end:
> -	if (pinned_pages != pinned_pages_on_stack)
> -		kfree(pinned_pages);
> +	if (pin_pages.is_kmalloc)
> +		kfree(pin_pages.pages);
>  	return ret;
>  }


Download attachment "signature.asc" of type "application/pgp-signature" (489 bytes)