linux-kernel - Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7fcf04f0f46d4a1df98687312bfc607dd7fffb1c.camel@fb.com>
Date:   Thu, 13 Dec 2018 19:38:51 +0000
From:   Matt Mullins <mmullins@...com>
To:     Martin Lau <kafai@...com>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "daniel@...earbox.net" <daniel@...earbox.net>,
        "jeyu@...nel.org" <jeyu@...nel.org>,
        "rostedt@...dmis.org" <rostedt@...dmis.org>,
        "ast@...nel.org" <ast@...nel.org>,
        Kernel Team <Kernel-team@...com>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

On Thu, 2018-12-13 at 19:22 +0000, Martin Lau wrote:
> On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> > Distributions build drivers as modules, including network and filesystem
> > drivers which export numerous tracepoints.  This enables
> > bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> > 
> > Signed-off-by: Matt Mullins <mmullins@...com>
> > ---
> > v1->v2:
> >   * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
> >     GOING.
> >   * check that kzalloc actually succeeded
> > 
> > I didn't try to check list_empty before taking the mutex since I want to avoid
> > races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
> > list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, but
> > Alexei suggested I use it to protect against fragility if the subsequent break;
> > eventually disappears.
> > 
> >  include/linux/module.h       |  4 ++
> >  include/linux/trace_events.h |  8 ++-
> >  kernel/bpf/syscall.c         | 11 ++--
> >  kernel/module.c              |  5 ++
> >  kernel/trace/bpf_trace.c     | 99 +++++++++++++++++++++++++++++++++++-
> >  5 files changed, 120 insertions(+), 7 deletions(-)
> > 
> > diff --git a/include/linux/module.h b/include/linux/module.h
> > index fce6b4335e36..5f147dd5e709 100644
> > --- a/include/linux/module.h
> > +++ b/include/linux/module.h
> > @@ -432,6 +432,10 @@ struct module {
> >  	unsigned int num_tracepoints;
> >  	tracepoint_ptr_t *tracepoints_ptrs;
> >  #endif
> > +#ifdef CONFIG_BPF_EVENTS
> > +	unsigned int num_bpf_raw_events;
> > +	struct bpf_raw_event_map *bpf_raw_events;
> > +#endif
> >  #ifdef HAVE_JUMP_LABEL
> >  	struct jump_entry *jump_entries;
> >  	unsigned int num_jump_entries;
> > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> > index 4130a5497d40..8a62731673f7 100644
> > --- a/include/linux/trace_events.h
> > +++ b/include/linux/trace_events.h
> > @@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
> >  int perf_event_query_prog_array(struct perf_event *event, void __user *info);
> >  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> >  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> > -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
> > +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
> > +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
> >  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> >  			    u32 *fd_type, const char **buf,
> >  			    u64 *probe_offset, u64 *probe_addr);
> > @@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf
> >  {
> >  	return -EOPNOTSUPP;
> >  }
> > -static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
> > +static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
> >  {
> >  	return NULL;
> >  }
> > +static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> > +{
> > +}
> >  static inline int bpf_get_perf_event_info(const struct perf_event *event,
> >  					  u32 *prog_id, u32 *fd_type,
> >  					  const char **buf, u64 *probe_offset,
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 70fb11106fc2..754370e3155e 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
> >  		bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
> >  		bpf_prog_put(raw_tp->prog);
> >  	}
> > +	bpf_put_raw_tracepoint(raw_tp->btp);
> >  	kfree(raw_tp);
> >  	return 0;
> >  }
> > @@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
> >  		return -EFAULT;
> >  	tp_name[sizeof(tp_name) - 1] = 0;
> >  
> > -	btp = bpf_find_raw_tracepoint(tp_name);
> > +	btp = bpf_get_raw_tracepoint(tp_name);
> >  	if (!btp)
> >  		return -ENOENT;
> >  
> >  	raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
> > -	if (!raw_tp)
> > -		return -ENOMEM;
> > +	if (!raw_tp) {
> > +		err = -ENOMEM;
> > +		goto out_put_btp;
> > +	}
> >  	raw_tp->btp = btp;
> >  
> >  	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
> > @@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
> >  	bpf_prog_put(prog);
> >  out_free_tp:
> >  	kfree(raw_tp);
> > +out_put_btp:
> > +	bpf_put_raw_tracepoint(btp);
> >  	return err;
> >  }
> >  
> > diff --git a/kernel/module.c b/kernel/module.c
> > index 49a405891587..06ec68f08387 100644
> > --- a/kernel/module.c
> > +++ b/kernel/module.c
> > @@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
> >  					     sizeof(*mod->tracepoints_ptrs),
> >  					     &mod->num_tracepoints);
> >  #endif
> > +#ifdef CONFIG_BPF_EVENTS
> > +	mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
> > +					   sizeof(*mod->bpf_raw_events),
> > +					   &mod->num_bpf_raw_events);
> > +#endif
> >  #ifdef HAVE_JUMP_LABEL
> >  	mod->jump_entries = section_objs(info, "__jump_table",
> >  					sizeof(*mod->jump_entries),
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 9864a35c8bb5..9ddb6fddb4e0 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -17,6 +17,43 @@
> >  #include "trace_probe.h"
> >  #include "trace.h"
> >  
> > +#ifdef CONFIG_MODULES
> > +struct bpf_trace_module {
> > +	struct module *module;
> > +	struct list_head list;
> > +};
> > +
> > +static LIST_HEAD(bpf_trace_modules);
> > +static DEFINE_MUTEX(bpf_module_mutex);
> > +
> > +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
> > +{
> > +	struct bpf_raw_event_map *btp, *ret = NULL;
> > +	struct bpf_trace_module *btm;
> > +	unsigned int i;
> > +
> > +	mutex_lock(&bpf_module_mutex);
> > +	list_for_each_entry(btm, &bpf_trace_modules, list) {
> > +		for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
> > +			btp = &btm->module->bpf_raw_events[i];
> > +			if (!strcmp(btp->tp->name, name)) {
> > +				if (try_module_get(btm->module))
> > +					ret = btp;
> > +				goto out;
> > +			}
> > +		}
> > +	}
> > +out:
> > +	mutex_unlock(&bpf_module_mutex);
> > +	return ret;
> > +}
> > +#else
> > +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
> > +{
> > +	return NULL;
> > +}
> > +#endif /* CONFIG_MODULES */
> > +
> >  u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> >  u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> >  
> > @@ -1076,7 +1113,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
> >  extern struct bpf_raw_event_map __start__bpf_raw_tp[];
> >  extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
> >  
> > -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
> > +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
> >  {
> >  	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
> >  
> > @@ -1084,7 +1121,16 @@ struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
> >  		if (!strcmp(btp->tp->name, name))
> >  			return btp;
> >  	}
> > -	return NULL;
> > +
> > +	return bpf_get_raw_tracepoint_module(name);
> > +}
> > +
> > +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> > +{
> > +	struct module *mod = __module_address((unsigned long)btp);
> > +
> > +	if (mod)
> > +		module_put(mod);
> >  }
> >  
> >  static __always_inline
> > @@ -1222,3 +1268,52 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> >  
> >  	return err;
> >  }
> > +
> > +#ifdef CONFIG_MODULES
> > +int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module)
> > +{
> > +	struct bpf_trace_module *btm, *tmp;
> > +	struct module *mod = module;
> > +
> > +	if (mod->num_bpf_raw_events == 0 ||
> > +	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
> > +		return 0;
> > +
> > +	mutex_lock(&bpf_module_mutex);
> > +
> > +	switch (op) {
> > +	case MODULE_STATE_COMING:
> > +		btm = kzalloc(sizeof(*btm), GFP_KERNEL);
> > +		if (btm) {
> > +			btm->module = module;
> > +			list_add(&btm->list, &bpf_trace_modules);
> > +		}
> 
> Is it fine to return 0 on !btm case?

That effectively just means we'll be ignoring tracepoints for a module
that is loaded while we can't allocate a bpf_trace_module (24 bytes) to
track it.  That feels like reasonable behavior to me.

> 
> Other looks good.
> 
> > +		break;
> > +	case MODULE_STATE_GOING:
> > +		list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
> > +			if (btm->module == module) {
> > +				list_del(&btm->list);
> > +				kfree(btm);
> > +				break;
> > +			}
> > +		}
> > +		break;
> > +	}
> > +
> > +	mutex_unlock(&bpf_module_mutex);
> > +
> > +	return 0;
> > +}
> > +
> > +static struct notifier_block bpf_module_nb = {
> > +	.notifier_call = bpf_event_notify,
> > +};
> > +
> > +int __init bpf_event_init(void)
> > +{
> > +	register_module_notifier(&bpf_module_nb);
> > +	return 0;
> > +}
> > +
> > +fs_initcall(bpf_event_init);
> > +#endif /* CONFIG_MODULES */
> > -- 
> > 2.17.1
> >