lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 3 May 2018 17:03:57 +1000
From:   "Tobin C. Harding" <tobin@...orbit.com>
To:     Song Liu <songliubraving@...com>
Cc:     netdev@...r.kernel.org, kernel-team@...com, qinteng@...com,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in
 nmi context

On Wed, May 02, 2018 at 04:20:29PM -0700, Song Liu wrote:
> Currently, we cannot parse build_id in nmi context because of
> up_read(&current->mm->mmap_sem), this makes stackmap with build_id
> less useful. This patch enables parsing build_id in nmi by putting
> the up_read() call in irq_work. To avoid memory allocation in nmi
> context, we use per cpu variable for the irq_work. As a result, only
> one irq_work per cpu is allowed. If the irq_work is in-use, we
> fallback to only report ips.
> 
> Cc: Alexei Starovoitov <ast@...nel.org>
> Cc: Daniel Borkmann <daniel@...earbox.net>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Song Liu <songliubraving@...com>
> ---
>  init/Kconfig          |  1 +
>  kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 54 insertions(+), 6 deletions(-)
> 
> diff --git a/init/Kconfig b/init/Kconfig
> index f013afc..480a4f2 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1391,6 +1391,7 @@ config BPF_SYSCALL
>  	bool "Enable bpf() system call"
>  	select ANON_INODES
>  	select BPF
> +	select IRQ_WORK
>  	default n
>  	help
>  	  Enable the bpf() system call that allows to manipulate eBPF
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 3ba102b..51d4aea 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -11,6 +11,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/elf.h>
>  #include <linux/pagemap.h>
> +#include <linux/irq_work.h>
>  #include "percpu_freelist.h"
>  
>  #define STACK_CREATE_FLAG_MASK					\
> @@ -32,6 +33,23 @@ struct bpf_stack_map {
>  	struct stack_map_bucket *buckets[];
>  };
>  
> +/* irq_work to run up_read() for build_id lookup in nmi context */
> +struct stack_map_irq_work {
> +	struct irq_work irq_work;
> +	struct rw_semaphore *sem;
> +};
> +
> +static void do_up_read(struct irq_work *entry)
> +{
> +	struct stack_map_irq_work *work = container_of(entry,
> +			struct stack_map_irq_work, irq_work);

perhaps:
	struct stack_map_irq_work *work;

	work = container_of(entry, struct stack_map_irq_work, irq_work);
> +	up_read(work->sem);
> +	work->sem = NULL;
> +}
> +
> +static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
> +
>  static inline bool stack_map_use_build_id(struct bpf_map *map)
>  {
>  	return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  {
>  	int i;
>  	struct vm_area_struct *vma;
> +	bool in_nmi_ctx = in_nmi();
> +	bool irq_work_busy = false;
> +	struct stack_map_irq_work *work;
> +
> +	if (in_nmi_ctx) {
> +		work = this_cpu_ptr(&up_read_work);
> +		if (work->irq_work.flags & IRQ_WORK_BUSY)
> +			/* cannot queue more up_read, fallback */
> +			irq_work_busy = true;
> +	}
>  
>  	/*
> -	 * We cannot do up_read() in nmi context, so build_id lookup is
> -	 * only supported for non-nmi events. If at some point, it is
> -	 * possible to run find_vma() without taking the semaphore, we
> -	 * would like to allow build_id lookup in nmi context.
> +	 * We cannot do up_read() in nmi context. To do build_id lookup
> +	 * in nmi context, we need to run up_read() in irq_work. We use
> +	 * a percpu variable to do the irq_work. If the irq_work is
> +	 * already used by another lookup, we fall back to report ips.
>  	 *
>  	 * Same fallback is used for kernel stack (!user) on a stackmap
>  	 * with build_id.
>  	 */
> -	if (!user || !current || !current->mm || in_nmi() ||
> +	if (!user || !current || !current->mm || irq_work_busy ||
>  	    down_read_trylock(&current->mm->mmap_sem) == 0) {
>  		/* cannot access current->mm, fall back to ips */
>  		for (i = 0; i < trace_nr; i++) {
> @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  			- vma->vm_start;
>  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>  	}
> -	up_read(&current->mm->mmap_sem);
> +
> +	if (!in_nmi_ctx)
> +		up_read(&current->mm->mmap_sem);
> +	else {

perhaps:
	if (!in_nmi_ctx) {
		up_read(&current->mm->mmap_sem);
	} else {


Hope this helps,
Tobin.

Powered by blists - more mailing lists