[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f36d19e7-cc6f-730b-cf13-d77e1ce88d2f@fb.com>
Date: Fri, 11 Jun 2021 15:12:05 -0700
From: Yonghong Song <yhs@...com>
To: Alexei Starovoitov <alexei.starovoitov@...il.com>,
<davem@...emloft.net>
CC: <daniel@...earbox.net>, <andrii@...nel.org>,
<netdev@...r.kernel.org>, <bpf@...r.kernel.org>,
<kernel-team@...com>
Subject: Re: [PATCH v2 bpf-next 1/3] bpf: Introduce bpf_timer
On 6/10/21 9:24 PM, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@...nel.org>
>
> Introduce 'struct bpf_timer { __u64 :64; __u64 :64; };' that can be embedded
> in hash/array/lru maps as regular field and helpers to operate on it:
>
> // Initialize the timer to call 'callback_fn' static function
> // First 4 bits of 'flags' specify clockid.
> // Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
> long bpf_timer_init(struct bpf_timer *timer, void *callback_fn, int flags);
>
> // Start the timer and set its expiration 'nsec' nanoseconds from the current time.
> long bpf_timer_start(struct bpf_timer *timer, u64 nsec);
>
> // Cancel the timer and wait for callback_fn to finish if it was running.
> long bpf_timer_cancel(struct bpf_timer *timer);
>
> Here is how BPF program might look like:
> struct map_elem {
> int counter;
> struct bpf_timer timer;
> };
>
> struct {
> __uint(type, BPF_MAP_TYPE_HASH);
> __uint(max_entries, 1000);
> __type(key, int);
> __type(value, struct map_elem);
> } hmap SEC(".maps");
>
> static int timer_cb(void *map, int *key, struct map_elem *val);
> /* val points to particular map element that contains bpf_timer. */
>
> SEC("fentry/bpf_fentry_test1")
> int BPF_PROG(test1, int a)
> {
> struct map_elem *val;
> int key = 0;
>
> val = bpf_map_lookup_elem(&hmap, &key);
> if (val) {
> bpf_timer_init(&val->timer, timer_cb, CLOCK_REALTIME);
> bpf_timer_start(&val->timer, 1000 /* call timer_cb2 in 1 usec */);
> }
> }
>
> This patch adds helper implementations that rely on hrtimers
> to call bpf functions as timers expire.
> The following patch adds necessary safety checks.
>
> Only programs with CAP_BPF are allowed to use bpf_timer.
>
> The amount of timers used by the program is constrained by
> the memcg recorded at map creation time.
>
> The bpf_timer_init() helper is receiving hidden 'map' and 'prog' arguments
> supplied by the verifier. The prog pointer is needed to do refcnting of bpf
> program to make sure that program doesn't get freed while timer is armed.
>
> The bpf_map_delete_elem() and bpf_map_update_elem() operations cancel
> and free the timer if given map element had it allocated.
> "bpftool map update" command can be used to cancel timers.
>
> Signed-off-by: Alexei Starovoitov <ast@...nel.org>
> ---
> include/linux/bpf.h | 2 +
> include/uapi/linux/bpf.h | 40 ++++++
> kernel/bpf/helpers.c | 227 +++++++++++++++++++++++++++++++++
> kernel/bpf/verifier.c | 109 ++++++++++++++++
> kernel/trace/bpf_trace.c | 2 +-
> scripts/bpf_doc.py | 2 +
> tools/include/uapi/linux/bpf.h | 40 ++++++
> 7 files changed, 421 insertions(+), 1 deletion(-)
>
[...]
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 2c1ba70abbf1..d25bbcdad8e6 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4778,6 +4778,38 @@ union bpf_attr {
> * Execute close syscall for given FD.
> * Return
> * A syscall result.
> + *
> + * long bpf_timer_init(struct bpf_timer *timer, void *callback_fn, int flags)
> + * Description
> + * Initialize the timer to call *callback_fn* static function.
> + * First 4 bits of *flags* specify clockid. Only CLOCK_MONOTONIC,
> + * CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
> + * All other bits of *flags* are reserved.
> + * Return
> + * 0 on success.
> + * **-EBUSY** if *timer* is already initialized.
> + * **-EINVAL** if invalid *flags* are passed.
> + *
> + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs)
> + * Description
> + * Start the timer and set its expiration N nanoseconds from the
> + * current time. The timer callback_fn will be invoked in soft irq
> + * context on some cpu and will not repeat unless another
> + * bpf_timer_start() is made. In such case the next invocation can
> + * migrate to a different cpu.
> + * Return
> + * 0 on success.
> + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
> + *
> + * long bpf_timer_cancel(struct bpf_timer *timer)
> + * Description
> + * Cancel the timer and wait for callback_fn to finish if it was running.
> + * Return
> + * 0 if the timer was not active.
> + * 1 if the timer was active.
> + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
> + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its own timer
> + * which would have led to a deadlock otherwise.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -4949,6 +4981,9 @@ union bpf_attr {
> FN(sys_bpf), \
> FN(btf_find_by_name_kind), \
> FN(sys_close), \
> + FN(timer_init), \
> + FN(timer_start), \
> + FN(timer_cancel), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> @@ -6061,6 +6096,11 @@ struct bpf_spin_lock {
> __u32 val;
> };
>
> +struct bpf_timer {
> + __u64 :64;
> + __u64 :64;
> +};
> +
> struct bpf_sysctl {
> __u32 write; /* Sysctl is being read (= 0) or written (= 1).
> * Allows 1,2,4-byte read, but no write.
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 544773970dbc..3a693d451ca3 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -985,6 +985,227 @@ const struct bpf_func_proto bpf_snprintf_proto = {
> .arg5_type = ARG_CONST_SIZE_OR_ZERO,
> };
>
> +struct bpf_hrtimer {
> + struct hrtimer timer;
> + struct bpf_map *map;
> + struct bpf_prog *prog;
> + void *callback_fn;
> + void *value;
> +};
> +
> +/* the actual struct hidden inside uapi struct bpf_timer */
> +struct bpf_timer_kern {
> + struct bpf_hrtimer *timer;
> + struct bpf_spin_lock lock;
> +};
Looks like in 32bit system, sizeof(struct bpf_timer_kern) is 64
and sizeof(struct bpf_timer) is 128.
struct bpf_spin_lock {
__u32 val;
};
struct bpf_timer {
__u64 :64;
__u64 :64;
};
Checking the code, we may not have issues as structure
"bpf_timer" is only used to reserve spaces and
map copy value routine handles that properly.
Maybe we can still make it consistent with
two fields in bpf_timer_kern mapping to
two fields in bpf_timer?
struct bpf_timer_kern {
__bpf_md_ptr(struct bpf_hrtimer *, timer);
struct bpf_spin_lock lock;
};
> +
> +static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
> +
> +static enum hrtimer_restart bpf_timer_cb(struct hrtimer *timer)
> +{
> + struct bpf_hrtimer *t = container_of(timer, struct bpf_hrtimer, timer);
> + struct bpf_prog *prog = t->prog;
> + struct bpf_map *map = t->map;
> + void *key;
> + u32 idx;
> + int ret;
> +
> + /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
> + * cannot be preempted by another bpf_timer_cb() on the same cpu.
> + * Remember the timer this callback is servicing to prevent
> + * deadlock if callback_fn() calls bpf_timer_cancel() on the same timer.
> + */
> + this_cpu_write(hrtimer_running, t);
> + if (map->map_type == BPF_MAP_TYPE_ARRAY) {
> + struct bpf_array *array = container_of(map, struct bpf_array, map);
> +
> + /* compute the key */
> + idx = ((char *)t->value - array->value) / array->elem_size;
> + key = &idx;
> + } else { /* hash or lru */
> + key = t->value - round_up(map->key_size, 8);
> + }
> +
> + ret = BPF_CAST_CALL(t->callback_fn)((u64)(long)map,
> + (u64)(long)key,
> + (u64)(long)t->value, 0, 0);
> + WARN_ON(ret != 0); /* Next patch disallows 1 in the verifier */
> +
> + /* The bpf function finished executed. Drop the prog refcnt.
> + * It could reach zero here and trigger free of bpf_prog
> + * and subsequent free of the maps that were holding timers.
> + * If callback_fn called bpf_timer_start on this timer
> + * the prog refcnt will be > 0.
> + *
> + * If callback_fn deleted map element the 't' could have been freed,
> + * hence t->prog deref is done earlier.
> + */
> + bpf_prog_put(prog);
> + this_cpu_write(hrtimer_running, NULL);
> + return HRTIMER_NORESTART;
> +}
> +
> +BPF_CALL_5(bpf_timer_init, struct bpf_timer_kern *, timer, void *, cb, int, flags,
> + struct bpf_map *, map, struct bpf_prog *, prog)
> +{
[...]
Powered by blists - more mailing lists