[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200113094310.GE35080@krava>
Date: Mon, 13 Jan 2020 10:43:10 +0100
From: Jiri Olsa <jolsa@...hat.com>
To: Björn Töpel <bjorn.topel@...el.com>
Cc: Alexei Starovoitov <alexei.starovoitov@...il.com>,
Jiri Olsa <jolsa@...nel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org,
bpf@...r.kernel.org, Andrii Nakryiko <andriin@...com>,
Yonghong Song <yhs@...com>, Martin KaFai Lau <kafai@...com>,
Jakub Kicinski <jakub.kicinski@...ronome.com>,
David Miller <davem@...hat.com>
Subject: Re: [PATCH 5/5] bpf: Allow to resolve bpf trampoline in unwind
On Tue, Jan 07, 2020 at 02:30:35PM +0100, Björn Töpel wrote:
> On 2020-01-07 14:05, Jiri Olsa wrote:
> > On Mon, Jan 06, 2020 at 03:46:40PM -0800, Alexei Starovoitov wrote:
> > > On Sun, Dec 29, 2019 at 03:37:40PM +0100, Jiri Olsa wrote:
> > > > When unwinding the stack we need to identify each
> > > > address to successfully continue. Adding latch tree
> > > > to keep trampolines for quick lookup during the
> > > > unwind.
> > > >
> > > > Signed-off-by: Jiri Olsa <jolsa@...nel.org>
> > > ...
> > > > +bool is_bpf_trampoline(void *addr)
> > > > +{
> > > > + return latch_tree_find(addr, &tree, &tree_ops) != NULL;
> > > > +}
> > > > +
> > > > struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > > > {
> > > > struct bpf_trampoline *tr;
> > > > @@ -65,6 +98,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > > > for (i = 0; i < BPF_TRAMP_MAX; i++)
> > > > INIT_HLIST_HEAD(&tr->progs_hlist[i]);
> > > > tr->image = image;
> > > > + latch_tree_insert(&tr->tnode, &tree, &tree_ops);
> > >
> > > Thanks for the fix. I was thinking to apply it, but then realized that bpf
> > > dispatcher logic has the same issue.
> > > Could you generalize the fix for both?
> > > May be bpf_jit_alloc_exec_page() can do latch_tree_insert() ?
> > > and new version of bpf_jit_free_exec() is needed that will do latch_tree_erase().
> > > Wdyt?
> >
> > I need to check the dispatcher code, but seems ok.. will check
> >
>
> Thanks Jiri! The trampoline and dispatcher share the image allocation, so
> putting it there would make sense.
>
> It's annoying that the dispatcher doesn't show up correctly in perf, and
> it's been on my list to fix that. Hopefully you beat me to it! :-D
hi,
attached patch seems to work for me (trampoline usecase), but I don't know
how to test it for dispatcher.. also I need to check if we need to decrease
BPF_TRAMP_MAX or BPF_DISPATCHER_MAX, it might take more time ;-)
jirka
---
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aed2bc39d72b..e0ca8780dc7a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -510,7 +510,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
-void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \
@@ -542,6 +541,13 @@ void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
+struct bpf_image {
+ struct latch_tree_node tnode;
+ unsigned char data[];
+};
+#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
+bool is_bpf_image(void *addr);
+void *bpf_image_alloc(void);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@@ -563,6 +569,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
struct bpf_prog *to) {}
+static inline bool is_bpf_image(void *addr)
+{
+ return false;
+}
#endif
struct bpf_func_info_aux {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29d47aae0dd1..53dc3adf6077 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -704,6 +704,8 @@ bool is_bpf_text_address(unsigned long addr)
rcu_read_lock();
ret = bpf_prog_kallsyms_find(addr) != NULL;
+ if (!ret)
+ ret = is_bpf_image((void*) addr);
rcu_read_unlock();
return ret;
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 204ee61a3904..b3e5b214fed8 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0;
} else {
old = d->image + d->image_off;
- noff = d->image_off ^ (PAGE_SIZE / 2);
+ noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
- d->image = bpf_jit_alloc_exec_page();
+ d->image = bpf_image_alloc();
if (!d->image)
goto out;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 79a04417050d..3ea56f89c68a 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
+#include <linux/rbtree_latch.h>
/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
#define TRAMPOLINE_HASH_BITS 10
@@ -14,7 +15,12 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
-void *bpf_jit_alloc_exec_page(void)
+static struct latch_tree_root image_tree __cacheline_aligned;
+
+/* serializes access to image_tree */
+static DEFINE_MUTEX(image_mutex);
+
+static void *bpf_jit_alloc_exec_page(void)
{
void *image;
@@ -30,6 +36,62 @@ void *bpf_jit_alloc_exec_page(void)
return image;
}
+static __always_inline bool image_tree_less(struct latch_tree_node *a,
+ struct latch_tree_node *b)
+{
+ struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
+ struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
+
+ return ia < ib;
+}
+
+static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
+{
+ void *image = container_of(n, struct bpf_image, tnode);
+
+ if (addr < image)
+ return -1;
+ if (addr >= image + PAGE_SIZE)
+ return 1;
+
+ return 0;
+}
+
+static const struct latch_tree_ops image_tree_ops = {
+ .less = image_tree_less,
+ .comp = image_tree_comp,
+};
+
+void *bpf_image_alloc(void)
+{
+ struct bpf_image *image;
+
+ image = bpf_jit_alloc_exec_page();
+ if (!image)
+ return NULL;
+
+ mutex_lock(&image_mutex);
+ latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
+ mutex_unlock(&image_mutex);
+ return image->data;
+}
+
+void bpf_image_delete(void *ptr)
+{
+ struct bpf_image *image = container_of(ptr, struct bpf_image, data);
+
+ mutex_lock(&image_mutex);
+ latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
+ synchronize_rcu();
+ bpf_jit_free_exec(image);
+ mutex_unlock(&image_mutex);
+}
+
+bool is_bpf_image(void *addr)
+{
+ return latch_tree_find(addr, &image_tree, &image_tree_ops) != NULL;
+}
+
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
@@ -50,7 +112,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
- image = bpf_jit_alloc_exec_page();
+ image = bpf_image_alloc();
if (!image) {
kfree(tr);
tr = NULL;
@@ -125,14 +187,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
+ * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
- void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
- void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -160,7 +222,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
if (fexit_cnt)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
- err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+ err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
&tr->func.model, flags,
fentry, fentry_cnt,
fexit, fexit_cnt,
@@ -251,7 +313,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
- bpf_jit_free_exec(tr->image);
+ bpf_image_delete(tr->image);
hlist_del(&tr->hlist);
kfree(tr);
out:
Powered by blists - more mailing lists