>From c11bf0aa23f1df25682056f2c581c9bc9bd8df31 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 16 Jun 2021 09:19:36 -0700 Subject: [PATCH bpf-next 1/2] bpf: Cancel and free timers when prog is going away. Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++ kernel/bpf/core.c | 3 ++ kernel/bpf/helpers.c | 70 +++++++++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7c403235c7e8..f67ea2512844 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -245,6 +245,7 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_free_used_timers(struct bpf_prog_aux *aux); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -871,6 +872,8 @@ struct bpf_prog_aux { u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; + spinlock_t timers_lock; + struct hlist_head used_timers; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct btf_mod_pair *used_btfs; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5e31ee9f7512..aa7960986a75 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -104,6 +104,8 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag fp->jit_requested = ebpf_jit_enabled(); INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); + INIT_HLIST_HEAD(&fp->aux->used_timers); + spin_lock_init(&fp->aux->timers_lock); mutex_init(&fp->aux->used_maps_mutex); mutex_init(&fp->aux->dst_mutex); @@ -2201,6 +2203,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) int i; aux = container_of(work, struct bpf_prog_aux, work); + bpf_free_used_timers(aux); bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b8df592c33cc..08f5d0f73f68 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -987,6 +987,7 @@ const struct bpf_func_proto bpf_snprintf_proto = { struct bpf_hrtimer { struct hrtimer timer; + struct hlist_node hlist; struct bpf_map *map; struct bpf_prog *prog; void *callback_fn; @@ -1004,7 +1005,6 @@ static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); static enum hrtimer_restart bpf_timer_cb(struct hrtimer *timer) { struct bpf_hrtimer *t = container_of(timer, struct bpf_hrtimer, timer); - struct bpf_prog *prog = t->prog; struct bpf_map *map = t->map; void *key; u32 idx; @@ -1031,16 +1031,6 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *timer) (u64)(long)t->value, 0, 0); WARN_ON(ret != 0); /* Next patch disallows 1 in the verifier */ - /* The bpf function finished executed. Drop the prog refcnt. - * It could reach zero here and trigger free of bpf_prog - * and subsequent free of the maps that were holding timers. - * If callback_fn called bpf_timer_start on this timer - * the prog refcnt will be > 0. - * - * If callback_fn deleted map element the 't' could have been freed, - * hence t->prog deref is done earlier. - */ - bpf_prog_put(prog); this_cpu_write(hrtimer_running, NULL); return HRTIMER_NORESTART; } @@ -1077,6 +1067,10 @@ BPF_CALL_5(bpf_timer_init, struct bpf_timer_kern *, timer, void *, cb, int, flag t->prog = prog; hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; + INIT_HLIST_NODE(&t->hlist); + spin_lock(&prog->aux->timers_lock); + hlist_add_head_rcu(&t->hlist, &prog->aux->used_timers); + spin_unlock(&prog->aux->timers_lock); timer->timer = t; out: ____bpf_spin_unlock(&timer->lock); @@ -1103,12 +1097,6 @@ BPF_CALL_2(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs) ret = -EINVAL; goto out; } - if (!hrtimer_active(&t->timer) || hrtimer_callback_running(&t->timer)) - /* If the timer wasn't active or callback already executing - * bump the prog refcnt to keep it alive until - * callback is invoked (again). - */ - bpf_prog_inc(t->prog); hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); out: ____bpf_spin_unlock(&timer->lock); @@ -1145,13 +1133,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) /* Cancel the timer and wait for associated callback to finish * if it was running. */ - if (hrtimer_cancel(&t->timer) == 1) { - /* If the timer was active then drop the prog refcnt, - * since callback will not be invoked. - */ - bpf_prog_put(t->prog); - ret = 1; - } + ret = hrtimer_cancel(&t->timer); out: ____bpf_spin_unlock(&timer->lock); return ret; @@ -1164,8 +1146,10 @@ static const struct bpf_func_proto bpf_timer_cancel_proto = { .arg1_type = ARG_PTR_TO_TIMER, }; -/* This function is called by delete_element in htab and lru maps - * and by map_free for array, lru, htab maps. +/* This function is called by map_delete/update_elem for individual + * element and by bpf_free_used_timers when prog is going away. + * When map is destroyed by ops->map_free all bpf_timers in there + * are freed. */ void bpf_timer_cancel_and_free(void *val) { @@ -1177,7 +1161,7 @@ void bpf_timer_cancel_and_free(void *val) if (!t) goto out; /* Cancel the timer and wait for callback to complete if it was - * running. Only individual delete_element in htab or lru maps can + * running. Only delete/update of individual element can * return 1 from hrtimer_cancel. * The whole map is destroyed when its refcnt reaches zero. * That happens after bpf prog refcnt reaches zero. @@ -1197,15 +1181,41 @@ void bpf_timer_cancel_and_free(void *val) * In non-preallocated maps timer->timer = NULL will happen after * callback completes, since prog execution is an RCU critical section. */ - if (this_cpu_read(hrtimer_running) != t && - hrtimer_cancel(&t->timer) == 1) - bpf_prog_put(t->prog); + if (this_cpu_read(hrtimer_running) != t) + hrtimer_cancel(&t->timer); + + spin_lock(&t->prog->aux->timers_lock); + hlist_del_rcu(&t->hlist); + spin_unlock(&t->prog->aux->timers_lock); + t->prog = LIST_POISON1; kfree(t); timer->timer = NULL; out: ____bpf_spin_unlock(&timer->lock); } +/* This function is called after prog->refcnt reaches zero. + * It's called before bpf_free_used_maps to clean up timers in maps + * if going away prog had callback_fn-s for them. + */ +void bpf_free_used_timers(struct bpf_prog_aux *aux) +{ + struct bpf_timer_kern *timer; + struct bpf_hrtimer *t; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_safe(t, n, &aux->used_timers, hlist) { + timer = t->value + t->map->timer_off; + /* The map isn't going away. The 'timer' points into map + * element that isn't going away either, but cancel_and_free + * could be racing with parallel map_delete_elem. + */ + bpf_timer_cancel_and_free(timer); + } + rcu_read_unlock(); +} + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; -- 2.30.2