[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3728d93e-67e3-8018-1025-7bab444c1f9e@iogearbox.net>
Date: Wed, 14 Mar 2018 17:07:35 +0100
From: Daniel Borkmann <daniel@...earbox.net>
To: Song Liu <songliubraving@...com>, netdev@...r.kernel.org,
ast@...nel.org, peterz@...radead.org
Cc: kernel-team@...com, hannes@...xchg.org, qinteng@...com
Subject: Re: [PATCH bpf-next v5 1/2] bpf: extend stackmap to save
binary_build_id+offset instead of address
Just a minor question below, the rest seems fine to me as far as I
can tell.
On 03/13/2018 10:47 PM, Song Liu wrote:
[...]
> +enum bpf_stack_build_id_status {
> + /* user space need an empty entry to identify end of a trace */
> + BPF_STACK_BUILD_ID_EMPTY = 0,
> + /* with valid build_id and offset */
> + BPF_STACK_BUILD_ID_VALID = 1,
> + /* couldn't get build_id, fallback to ip */
> + BPF_STACK_BUILD_ID_IP = 2,
> +};
> +
> +#define BPF_BUILD_ID_SIZE 20
> +struct bpf_stack_build_id {
> + __s32 status;
> + unsigned char build_id[BPF_BUILD_ID_SIZE];
> + union {
> + __u64 offset;
> + __u64 ip;
> + };
> +};
[...]> BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> u64, flags)
> {
> struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
> struct perf_callchain_entry *trace;
> struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
> - u32 max_depth = map->value_size / 8;
> + u32 max_depth = map->value_size / stack_map_data_size(map);
> /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
> u32 init_nr = sysctl_perf_event_max_stack - max_depth;
> u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
> @@ -128,11 +318,16 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> bool user = flags & BPF_F_USER_STACK;
> bool kernel = !user;
> u64 *ips;
> + bool hash_matches;
>
> if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
> return -EINVAL;
>
> + /* build_id+offset stack map only supports user stack */
> + if (stack_map_use_build_id(map) && !user)
> + return -EINVAL;
Instead of bailing out here, wouldn't it make sense to just reuse the
BPF_STACK_BUILD_ID_IP status and use this 'fallback' for kernel similar
to what we do anyway in stack_map_get_build_id_offset() when we cannot
get the build id so that map can be used for both cases?
> trace = get_perf_callchain(regs, init_nr, kernel, user,
> sysctl_perf_event_max_stack, false, false);
>
> @@ -156,24 +351,42 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> id = hash & (smap->n_buckets - 1);
> bucket = READ_ONCE(smap->buckets[id]);
>
> - if (bucket && bucket->hash == hash) {
> - if (flags & BPF_F_FAST_STACK_CMP)
> + hash_matches = bucket && bucket->hash == hash;
> + /* fast cmp */
> + if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
> + return id;
> +
> + if (stack_map_use_build_id(map)) {
> + /* for build_id+offset, pop a bucket before slow cmp */
> + new_bucket = (struct stack_map_bucket *)
> + pcpu_freelist_pop(&smap->freelist);
> + if (unlikely(!new_bucket))
> + return -ENOMEM;
> + stack_map_get_build_id_offset(map, new_bucket, ips, trace_nr);
> + trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
> + if (hash_matches && bucket->nr == trace_nr &&
> + memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
> + pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
> return id;
> - if (bucket->nr == trace_nr &&
> - memcmp(bucket->ip, ips, trace_len) == 0)
> + }
> + if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
> + pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
> + return -EEXIST;
> + }
> + } else {
> + if (hash_matches && bucket->nr == trace_nr &&
> + memcmp(bucket->data, ips, trace_len) == 0)
> return id;
> + if (bucket && !(flags & BPF_F_REUSE_STACKID))
> + return -EEXIST;
> +
> + new_bucket = (struct stack_map_bucket *)
> + pcpu_freelist_pop(&smap->freelist);
> + if (unlikely(!new_bucket))
> + return -ENOMEM;
> + memcpy(new_bucket->data, ips, trace_len);
> }
>
> - /* this call stack is not in the map, try to add it */
> - if (bucket && !(flags & BPF_F_REUSE_STACKID))
> - return -EEXIST;
> -
> - new_bucket = (struct stack_map_bucket *)
> - pcpu_freelist_pop(&smap->freelist);
> - if (unlikely(!new_bucket))
> - return -ENOMEM;
> -
> - memcpy(new_bucket->ip, ips, trace_len);
> new_bucket->hash = hash;
> new_bucket->nr = trace_nr;
>
> @@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
> if (!bucket)
> return -ENOENT;
>
> - trace_len = bucket->nr * sizeof(u64);
> - memcpy(value, bucket->ip, trace_len);
> + trace_len = bucket->nr * stack_map_data_size(map);
> + memcpy(value, bucket->data, trace_len);
> memset(value + trace_len, 0, map->value_size - trace_len);
>
> old_bucket = xchg(&smap->buckets[id], bucket);
>
Powered by blists - more mailing lists