linux-kernel - Re: [PATCH v2 bpf-next 1/4] tracing/probe: Add PERF_EVENT_IOC_QUERY

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4Bzb0jBmsdeKZ_vN4w-z1tM8M2Ygz_CoBoO_2iV55tgL1Bg@mail.gmail.com>
Date:   Mon, 12 Aug 2019 08:56:45 -0700
From:   Andrii Nakryiko <andrii.nakryiko@...il.com>
To:     Daniel Xu <dxu@...uu.xyz>
Cc:     Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
        Andrii Nakryiko <andriin@...com>, peterz@...raded.org,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexei Starovoitov <ast@...com>,
        alexander.shishkin@...ux.intel.com, Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2 bpf-next 1/4] tracing/probe: Add PERF_EVENT_IOC_QUERY_PROBE
 ioctl

On Fri, Aug 9, 2019 at 2:47 PM Daniel Xu <dxu@...uu.xyz> wrote:
>
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.
>
> While bpf programs may be able to manually count nhit, there is no way
> to gather nmissed. In other words, it is currently not possible to
> retrieve information about FD-based probes.
>
> This patch adds a new ioctl that lets users query nmissed (as well as
> nhit for completeness). We currently only add support for [uk]probes
> but leave the possibility open for other probes like tracepoint.
>
> Signed-off-by: Daniel Xu <dxu@...uu.xyz>
> ---
>  include/linux/trace_events.h    | 12 ++++++++++++
>  include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
>  kernel/events/core.c            | 20 ++++++++++++++++++++
>  kernel/trace/trace_kprobe.c     | 23 +++++++++++++++++++++++
>  kernel/trace/trace_uprobe.c     | 23 +++++++++++++++++++++++
>  5 files changed, 97 insertions(+)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 5150436783e8..61558f19696a 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -586,6 +586,12 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
>                                u32 *fd_type, const char **symbol,
>                                u64 *probe_offset, u64 *probe_addr,
>                                bool perf_type_tracepoint);
> +extern int perf_kprobe_event_query(struct perf_event *event, void __user *info);
> +#else
> +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> +{
> +       return -EOPNOTSUPP;
> +}
>  #endif
>  #ifdef CONFIG_UPROBE_EVENTS
>  extern int  perf_uprobe_init(struct perf_event *event,
> @@ -594,6 +600,12 @@ extern void perf_uprobe_destroy(struct perf_event *event);
>  extern int bpf_get_uprobe_info(const struct perf_event *event,
>                                u32 *fd_type, const char **filename,
>                                u64 *probe_offset, bool perf_type_tracepoint);
> +extern int perf_uprobe_event_query(struct perf_event *event, void __user *info);
> +#else
> +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> +{
> +       return -EOPNOTSUPP;
> +}
>  #endif
>  extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
>                                      char *filter_str);
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 7198ddd0c6b1..65faa9b2a3b4 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -447,6 +447,24 @@ struct perf_event_query_bpf {
>         __u32   ids[0];
>  };
>
> +/*
> + * Structure used by below PERF_EVENT_IOC_QUERY_PROBE command
> + * to query information about the probe attached to the perf
> + * event. Currently only supports [uk]probes.
> + */
> +struct perf_event_query_probe {
> +       /*
> +        * Set by the kernel to indicate number of times this probe
> +        * was temporarily disabled
> +        */
> +       __u64   nmissed;
> +       /*
> +        * Set by the kernel to indicate number of times this probe
> +        * was hit
> +        */
> +       __u64   nhit;
> +};
> +
>  /*
>   * Ioctls that can be done on a perf event fd:
>   */
> @@ -462,6 +480,7 @@ struct perf_event_query_bpf {
>  #define PERF_EVENT_IOC_PAUSE_OUTPUT            _IOW('$', 9, __u32)
>  #define PERF_EVENT_IOC_QUERY_BPF               _IOWR('$', 10, struct perf_event_query_bpf *)
>  #define PERF_EVENT_IOC_MODIFY_ATTRIBUTES       _IOW('$', 11, struct perf_event_attr *)
> +#define PERF_EVENT_IOC_QUERY_PROBE             _IOR('$', 12, struct perf_event_query_probe *)
>
>  enum perf_event_ioc_flags {
>         PERF_IOC_FLAG_GROUP             = 1U << 0,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 026a14541a38..3e0fe6eaaad0 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5060,6 +5060,8 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg);
>  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
>  static int perf_copy_attr(struct perf_event_attr __user *uattr,
>                           struct perf_event_attr *attr);
> +static int perf_probe_event_query(struct perf_event *event,
> +                                   void __user *info);
>
>  static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
>  {
> @@ -5143,6 +5145,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>
>                 return perf_event_modify_attr(event,  &new_attr);
>         }
> +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
> +       case PERF_EVENT_IOC_QUERY_PROBE:
> +               return perf_probe_event_query(event, (void __user *)arg);
> +#endif
>         default:
>                 return -ENOTTY;
>         }
> @@ -8833,6 +8839,20 @@ static inline void perf_tp_register(void)
>  #endif
>  }
>
> +static int perf_probe_event_query(struct perf_event *event,
> +                                   void __user *info)
> +{
> +#ifdef CONFIG_KPROBE_EVENTS
> +       if (event->attr.type == perf_kprobe.type)
> +               return perf_kprobe_event_query(event, (void __user *)info);
> +#endif
> +#ifdef CONFIG_UPROBE_EVENTS
> +       if (event->attr.type == perf_uprobe.type)
> +               return perf_uprobe_event_query(event, (void __user *)info);
> +#endif
> +       return -EINVAL;
> +}
> +
>  static void perf_event_free_filter(struct perf_event *event)
>  {
>         ftrace_profile_free_filter(event);
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 9d483ad9bb6c..a734c2d506be 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -196,6 +196,29 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
>         return within_error_injection_list(trace_kprobe_address(tk));
>  }
>
> +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> +{
> +       struct perf_event_query_probe __user *uquery = info;
> +       struct perf_event_query_probe query = {};
> +       struct trace_event_call *call = event->tp_event;
> +       struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> +       u64 nmissed, nhit;
> +
> +       if (!capable(CAP_SYS_ADMIN))
> +               return -EPERM;
> +       if (copy_from_user(&query, uquery, sizeof(query)))

what about forward/backward compatibility? Didn't you have a size
field for perf_event_query_probe?

> +               return -EFAULT;
> +
> +       nhit = trace_kprobe_nhit(tk);
> +       nmissed = tk->rp.kp.nmissed;
> +
> +       if (put_user(nmissed, &uquery->nmissed) ||
> +           put_user(nhit, &uquery->nhit))

Wouldn't it be nicer to just do one user put for entire struct (or at
least relevant part of it with backward/forward compatibility?).

> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
>  static int register_kprobe_event(struct trace_kprobe *tk);
>  static int unregister_kprobe_event(struct trace_kprobe *tk);
>
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index 1ceedb9146b1..5f50386ada59 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -1333,6 +1333,29 @@ static inline void init_trace_event_call(struct trace_uprobe *tu)
>         call->data = tu;
>  }
>
> +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> +{
> +       struct perf_event_query_probe __user *uquery = info;
> +       struct perf_event_query_probe query = {};
> +       struct trace_event_call *call = event->tp_event;
> +       struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
> +       u64 nmissed, nhit;
> +
> +       if (!capable(CAP_SYS_ADMIN))
> +               return -EPERM;
> +       if (copy_from_user(&query, uquery, sizeof(query)))
> +               return -EFAULT;
> +
> +       nhit = tu->nhit;
> +       nmissed = 0;
> +
> +       if (put_user(nmissed, &uquery->nmissed) ||
> +           put_user(nhit, &uquery->nhit))
> +               return -EFAULT;

same questions as above

> +
> +       return 0;
> +}
> +
>  static int register_uprobe_event(struct trace_uprobe *tu)
>  {
>         init_trace_event_call(tu);
> --
> 2.20.1
>