[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzYEOzfmwi8n8K_W_6Pc+gC081ncmRCAq8Fz0vr=y7eMcg@mail.gmail.com>
Date: Mon, 23 Aug 2021 21:50:40 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Dave Marchevsky <davemarchevsky@...com>
Cc: bpf <bpf@...r.kernel.org>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Yonghong Song <yhs@...com>,
Florent Revest <revest@...omium.org>,
Networking <netdev@...r.kernel.org>,
"open list:KERNEL SELFTEST FRAMEWORK"
<linux-kselftest@...r.kernel.org>
Subject: Re: [PATCH bpf-next 2/5] bpf: add bpf_trace_vprintk helper
On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@...com> wrote:
>
> This helper is meant to be "bpf_trace_printk, but with proper vararg
We have bpf_snprintf() and bpf_seq_printf() names for other BPF
helpers using the same approach. How about we call this one simply
`bpf_printf`? It will be in line with other naming, it is logical BPF
equivalent of user-space printf (which outputs to stderr, which in BPF
land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
to have a nice and short BPF_PRINTF() convenience macro provided by
libbpf.
> support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> array. Write to dmesg using the same mechanism as bpf_trace_printk.
Are you sure about the dmesg part?... bpf_trace_printk is outputting
into /sys/kernel/debug/tracing/trace_pipe.
>
> Signed-off-by: Dave Marchevsky <davemarchevsky@...com>
> ---
> include/linux/bpf.h | 1 +
> include/uapi/linux/bpf.h | 23 +++++++++++++++
> kernel/bpf/core.c | 5 ++++
> kernel/bpf/helpers.c | 2 ++
> kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++-
> tools/include/uapi/linux/bpf.h | 23 +++++++++++++++
> 6 files changed, 105 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index be8d57e6e78a..b6c45a6cbbba 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
> int bpf_prog_calc_tag(struct bpf_prog *fp);
>
> const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
> +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
>
> typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
> unsigned long off, unsigned long len);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c4f7892edb2b..899a2649d986 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4871,6 +4871,28 @@ union bpf_attr {
> * Return
> * Value specified by user at BPF link creation/attachment time
> * or 0, if it was not specified.
> + *
> + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
> + * Description
> + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
> + * to format. Supports up to 12 arguments to print in this way.
we didn't specify 12 in the description of bpf_snprintf() or
bpf_seq_printf(), so why start doing that here? For data/args format,
let's just refer to bpf_snprintf() or bpf_seq_printf(), whichever does
a better job explaining this :)
> + * The *fmt* and *fmt_size* are for the format string itself. The *data* and
> + * *data_len* are format string arguments.
> + *
> + * Each format specifier in **fmt** corresponds to one u64 element
> + * in the **data** array. For strings and pointers where pointees
> + * are accessed, only the pointer values are stored in the *data*
> + * array. The *data_len* is the size of *data* in bytes.
> + * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
> + * Reading kernel memory may fail due to either invalid address or
> + * valid address but requiring a major memory fault. If reading kernel memory
> + * fails, the string for **%s** will be an empty string, and the ip
> + * address for **%p{i,I}{4,6}** will be 0. Not returning error to
> + * bpf program is consistent with what **bpf_trace_printk**\ () does for now.
This is just a copy/paste from other helpers. Let's avoid duplication
and just point people to a description in other helpers.
> + *
> + * Return
> + * The number of bytes written to the buffer, or a negative error
> + * in case of failure.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -5048,6 +5070,7 @@ union bpf_attr {
> FN(timer_cancel), \
> FN(get_func_ip), \
> FN(get_attach_cookie), \
> + FN(trace_vprintk), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 91f24c7b38a1..a137c550046c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2357,6 +2357,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> return NULL;
> }
>
> +const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
> +{
> + return NULL;
> +}
> +
> u64 __weak
> bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
> void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5ce19b376ef7..863e5ee68558 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1419,6 +1419,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
> return &bpf_snprintf_btf_proto;
> case BPF_FUNC_snprintf:
> return &bpf_snprintf_proto;
> + case BPF_FUNC_trace_vprintk:
> + return bpf_get_trace_vprintk_proto();
> default:
> return NULL;
> }
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 2cf4bfa1ab7b..8b3f1ec9e082 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
> .arg2_type = ARG_CONST_SIZE,
> };
>
> -const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> +static __always_inline void __set_printk_clr_event(void)
Please drop __always_inline, we only use __always_inline for
absolutely performance critical routines. Let the compiler decide.
> {
> /*
> * This program might be calling bpf_trace_printk,
> @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> */
> if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
> pr_warn_ratelimited("could not enable bpf_trace_printk events");
> +}
>
> +const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> +{
> + __set_printk_clr_event();
> return &bpf_trace_printk_proto;
> }
>
> +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
> + u32, data_len)
> +{
> + static char buf[BPF_TRACE_PRINTK_SIZE];
> + unsigned long flags;
> + int ret, num_args;
> + u32 *bin_args;
> +
> + if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
> + (data_len && !data))
> + return -EINVAL;
> + num_args = data_len / 8;
> +
> + ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
> + if (ret < 0)
> + return ret;
> +
> + raw_spin_lock_irqsave(&trace_printk_lock, flags);
> + ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> +
> + trace_bpf_trace_printk(buf);
> + raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> +
> + bpf_bprintf_cleanup();
> +
> + return ret;
> +}
> +
> +static const struct bpf_func_proto bpf_trace_vprintk_proto = {
> + .func = bpf_trace_vprintk,
> + .gpl_only = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_MEM,
> + .arg2_type = ARG_CONST_SIZE,
> + .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
> + .arg4_type = ARG_CONST_SIZE_OR_ZERO,
> +};
> +
> +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
> +{
> + __set_printk_clr_event();
> + return &bpf_trace_vprintk_proto;
> +}
> +
> BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
> const void *, data, u32, data_len)
> {
> @@ -1113,6 +1161,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_snprintf_proto;
> case BPF_FUNC_get_func_ip:
> return &bpf_get_func_ip_proto_tracing;
> + case BPF_FUNC_trace_vprintk:
> + return bpf_get_trace_vprintk_proto();
> default:
> return bpf_base_func_proto(func_id);
> }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c4f7892edb2b..899a2649d986 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4871,6 +4871,28 @@ union bpf_attr {
> * Return
> * Value specified by user at BPF link creation/attachment time
> * or 0, if it was not specified.
> + *
> + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
> + * Description
> + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
> + * to format. Supports up to 12 arguments to print in this way.
> + * The *fmt* and *fmt_size* are for the format string itself. The *data* and
> + * *data_len* are format string arguments.
> + *
> + * Each format specifier in **fmt** corresponds to one u64 element
> + * in the **data** array. For strings and pointers where pointees
> + * are accessed, only the pointer values are stored in the *data*
> + * array. The *data_len* is the size of *data* in bytes.
> + * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
> + * Reading kernel memory may fail due to either invalid address or
> + * valid address but requiring a major memory fault. If reading kernel memory
> + * fails, the string for **%s** will be an empty string, and the ip
> + * address for **%p{i,I}{4,6}** will be 0. Not returning error to
> + * bpf program is consistent with what **bpf_trace_printk**\ () does for now.
> + *
> + * Return
> + * The number of bytes written to the buffer, or a negative error
> + * in case of failure.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -5048,6 +5070,7 @@ union bpf_attr {
> FN(timer_cancel), \
> FN(get_func_ip), \
> FN(get_attach_cookie), \
> + FN(trace_vprintk), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>
Powered by blists - more mailing lists