netdev - Re: [PATCH bpf-next v5 1/2] bpf: extend stackmap to save binary_build

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <3728d93e-67e3-8018-1025-7bab444c1f9e@iogearbox.net>
Date:   Wed, 14 Mar 2018 17:07:35 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Song Liu <songliubraving@...com>, netdev@...r.kernel.org,
        ast@...nel.org, peterz@...radead.org
Cc:     kernel-team@...com, hannes@...xchg.org, qinteng@...com
Subject: Re: [PATCH bpf-next v5 1/2] bpf: extend stackmap to save
 binary_build_id+offset instead of address

Just a minor question below, the rest seems fine to me as far as I
can tell.

On 03/13/2018 10:47 PM, Song Liu wrote:
[...]
> +enum bpf_stack_build_id_status {
> +	/* user space need an empty entry to identify end of a trace */
> +	BPF_STACK_BUILD_ID_EMPTY = 0,
> +	/* with valid build_id and offset */
> +	BPF_STACK_BUILD_ID_VALID = 1,
> +	/* couldn't get build_id, fallback to ip */
> +	BPF_STACK_BUILD_ID_IP = 2,
> +};
> +
> +#define BPF_BUILD_ID_SIZE 20
> +struct bpf_stack_build_id {
> +	__s32		status;
> +	unsigned char	build_id[BPF_BUILD_ID_SIZE];
> +	union {
> +		__u64	offset;
> +		__u64	ip;
> +	};
> +};
[...]>  BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>  	   u64, flags)
>  {
>  	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
>  	struct perf_callchain_entry *trace;
>  	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
> -	u32 max_depth = map->value_size / 8;
> +	u32 max_depth = map->value_size / stack_map_data_size(map);
>  	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
>  	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
>  	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
> @@ -128,11 +318,16 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>  	bool user = flags & BPF_F_USER_STACK;
>  	bool kernel = !user;
>  	u64 *ips;
> +	bool hash_matches;
>  
>  	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
>  			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
>  		return -EINVAL;
>  
> +	/* build_id+offset stack map only supports user stack */
> +	if (stack_map_use_build_id(map) && !user)
> +		return -EINVAL;

Instead of bailing out here, wouldn't it make sense to just reuse the
BPF_STACK_BUILD_ID_IP status and use this 'fallback' for kernel similar
to what we do anyway in stack_map_get_build_id_offset() when we cannot
get the build id so that map can be used for both cases?

>  	trace = get_perf_callchain(regs, init_nr, kernel, user,
>  				   sysctl_perf_event_max_stack, false, false);
>  
> @@ -156,24 +351,42 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>  	id = hash & (smap->n_buckets - 1);
>  	bucket = READ_ONCE(smap->buckets[id]);
>  
> -	if (bucket && bucket->hash == hash) {
> -		if (flags & BPF_F_FAST_STACK_CMP)
> +	hash_matches = bucket && bucket->hash == hash;
> +	/* fast cmp */
> +	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
> +		return id;
> +
> +	if (stack_map_use_build_id(map)) {
> +		/* for build_id+offset, pop a bucket before slow cmp */
> +		new_bucket = (struct stack_map_bucket *)
> +			pcpu_freelist_pop(&smap->freelist);
> +		if (unlikely(!new_bucket))
> +			return -ENOMEM;
> +		stack_map_get_build_id_offset(map, new_bucket, ips, trace_nr);
> +		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
> +		if (hash_matches && bucket->nr == trace_nr &&
> +		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
> +			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
>  			return id;
> -		if (bucket->nr == trace_nr &&
> -		    memcmp(bucket->ip, ips, trace_len) == 0)
> +		}
> +		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
> +			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
> +			return -EEXIST;
> +		}
> +	} else {
> +		if (hash_matches && bucket->nr == trace_nr &&
> +		    memcmp(bucket->data, ips, trace_len) == 0)
>  			return id;
> +		if (bucket && !(flags & BPF_F_REUSE_STACKID))
> +			return -EEXIST;
> +
> +		new_bucket = (struct stack_map_bucket *)
> +			pcpu_freelist_pop(&smap->freelist);
> +		if (unlikely(!new_bucket))
> +			return -ENOMEM;
> +		memcpy(new_bucket->data, ips, trace_len);
>  	}
>  
> -	/* this call stack is not in the map, try to add it */
> -	if (bucket && !(flags & BPF_F_REUSE_STACKID))
> -		return -EEXIST;
> -
> -	new_bucket = (struct stack_map_bucket *)
> -		pcpu_freelist_pop(&smap->freelist);
> -	if (unlikely(!new_bucket))
> -		return -ENOMEM;
> -
> -	memcpy(new_bucket->ip, ips, trace_len);
>  	new_bucket->hash = hash;
>  	new_bucket->nr = trace_nr;
>  
> @@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
>  	if (!bucket)
>  		return -ENOENT;
>  
> -	trace_len = bucket->nr * sizeof(u64);
> -	memcpy(value, bucket->ip, trace_len);
> +	trace_len = bucket->nr * stack_map_data_size(map);
> +	memcpy(value, bucket->data, trace_len);
>  	memset(value + trace_len, 0, map->value_size - trace_len);
>  
>  	old_bucket = xchg(&smap->buckets[id], bucket);
>