[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20190405214835.GK7431@mini-arch.hsd1.ca.comcast.net>
Date: Fri, 5 Apr 2019 14:48:35 -0700
From: Stanislav Fomichev <sdf@...ichev.me>
To: Martin Lau <kafai@...com>
Cc: Stanislav Fomichev <sdf@...gle.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>,
"davem@...emloft.net" <davem@...emloft.net>,
"ast@...nel.org" <ast@...nel.org>,
"daniel@...earbox.net" <daniel@...earbox.net>
Subject: Re: [PATCH bpf-next 1/3] bpf: support input __sk_buff context in
BPF_PROG_TEST_RUN
On 04/05, Martin Lau wrote:
> On Thu, Apr 04, 2019 at 11:51:31AM -0700, Stanislav Fomichev wrote:
> > Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN:
> > * ctx_in/ctx_size_in - input context
> > * ctx_out/ctx_size_out - output context
> >
> > The intended use case is to pass some meta data to the test runs that
> > operate on skb (this has being brought up on recent LPC).
> >
> > For programs that use bpf_prog_test_run_skb, support __sk_buff input and
> > output. Initially, from input __sk_buff, copy _only_ cb and priority into
> > skb, all other non-zero fields are prohibited (with EINVAL).
> > If the user has set ctx_out/ctx_size_out, copy the potentially modified
> > __sk_buff back to the userspace.
> >
> > We require all fields of input __sk_buff except the ones we explicitly
> > support to be set to zero. The expectation is that in the future we might
> > add support for more fields and we want to fail explicitly if the user
> > runs the program on the kernel where we don't yet support them.
> >
> > The API is intentionally vague (i.e. we don't explicitly add __sk_buff
> > to bpf_attr, but ctx_in) to potentially let other test_run types use
> > this interface in the future (this can be xdp_md for xdp types for
> > example).
> >
> > Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
> > ---
> > include/uapi/linux/bpf.h | 7 +++
> > kernel/bpf/syscall.c | 2 +-
> > net/bpf/test_run.c | 133 +++++++++++++++++++++++++++++++++++++--
> > 3 files changed, 136 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 837024512baf..8e96f99cebf8 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -396,6 +396,13 @@ union bpf_attr {
> > __aligned_u64 data_out;
> > __u32 repeat;
> > __u32 duration;
> > + __u32 ctx_size_in; /* input: len of ctx_in */
> > + __u32 ctx_size_out; /* input/output: len of ctx_out
> > + * returns ENOSPC if ctx_out
> > + * is too small.
> > + */
> > + __aligned_u64 ctx_in;
> > + __aligned_u64 ctx_out;
> > } test;
> >
> > struct { /* anonymous struct used by BPF_*_GET_*_ID */
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 1d65e56594db..4ad754e2943a 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
> > return cgroup_bpf_prog_query(attr, uattr);
> > }
> >
> > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
> > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
> >
> > static int bpf_prog_test_run(const union bpf_attr *attr,
> > union bpf_attr __user *uattr)
> > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> > index fab142b796ef..593912a66d64 100644
> > --- a/net/bpf/test_run.c
> > +++ b/net/bpf/test_run.c
> > @@ -123,12 +123,121 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
> > return data;
> > }
> >
> > +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
> > +{
> > + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
> > + u32 size = kattr->test.ctx_size_in;
> > + void *data;
> > +
> > + if ((kattr->test.ctx_size_in && !kattr->test.ctx_in) ||
> > + (!kattr->test.ctx_size_in && kattr->test.ctx_in))
> > + return ERR_PTR(-EINVAL);
> > +
> > + if ((kattr->test.ctx_size_out && !kattr->test.ctx_out) ||
> > + (!kattr->test.ctx_size_out && kattr->test.ctx_out))
> > + return ERR_PTR(-EINVAL);
> These attr checks belong to bpf_prog_test_run() in syscall.c.
Ack, will move.
> > +
> > + if (!size)
> > + return NULL;
> > +
> > + if (size > max_size)
> Not allowed even the tailing (size - max_size) bytes are zero?
We can do that if you think it makes sense. My intention was
not to unnecessarily complicate it because it's for testing only.
> > + return ERR_PTR(-EINVAL);
> The convention should be -E2BIG if the userspace passed in something bigger
> with tailing non-zero that the kernel cannot understand.
SGTM.
> > +
> > + data = kzalloc(max_size, GFP_USER);
> > + if (!data)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + if (copy_from_user(data, data_in, size)) {
> > + kfree(data);
> > + return ERR_PTR(-EFAULT);
> > + }
> > + return data;
> > +}
> > +
> > +static int bpf_ctx_finish(const union bpf_attr *kattr,
> > + union bpf_attr __user *uattr, const void *data,
> > + u32 size)
> > +{
> > + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
> > + u32 copy_size = size;
> Is the "u32 copy_size = size;" needed?
> I don't see "size" is changed.
>
> > +
> > + if (!kattr->test.ctx_size_out)
> > + return 0;
> > +
> > + if (copy_size > kattr->test.ctx_size_out)
> If I read both bpf_ctx_init() and btf_ctx_finish() correctly,
> the kattr->test.ctx_size_in (input) can be smaller than
> sizeof(struct __sk_buff) but the kattr->test.ctx_size_out (output)
> cannot be smaller than sizeof(struct __sk_buff)?
Yeah, you read that correct. Let me actually do the same as we do in
bpf_test_finish - clamp to the user's requested size (and return
-ENOSPC). That will address you comment about copy_size as well.
> > + return -ENOSPC;
> > +
> > + if (data_out && copy_to_user(data_out, data, copy_size))
> "data_out" check is not needed.
Agreed.
>
> > + return -EFAULT;
> > + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > +/**
> > + * range_is_zero - test whether buffer is initialized
> > + * @buf: buffer to check
> > + * @from: check from this position
> > + * @to: check up until (excluding) this position
> > + *
> > + * This function returns true if the there is a non-zero byte
> > + * in the buf in the range [from,to).
> > + */
> > +static inline bool range_is_zero(void *buf, size_t from, size_t to)
> > +{
> > + return !memchr_inv((u8 *)buf + from, 0, to - from);
> > +}
> > +
> > +static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
> > +{
> > + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
> > +
> > + if (!__skb)
> > + return 0;
> > +
> > + /* make sure the fields we don't use are zeroed */
> > + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
> > + return -EINVAL;
> > +
> > + /* priority is allowed */
> > +
> > + if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
> > + FIELD_SIZEOF(struct __sk_buff, priority),
> > + offsetof(struct __sk_buff, cb)))
> > + return -EINVAL;
> > +
> > + /* cb is allowed */
> > +
> > + if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
> > + FIELD_SIZEOF(struct __sk_buff, cb),
> > + sizeof(struct __sk_buff)))
> > + return -EINVAL;
> > +
> > + skb->priority = __skb->priority;
> > + memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
> > +
> > + return 0;
> > +}
> > +
> > +static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
> > +{
> > + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
> > +
> > + if (!__skb)
> > + return;
> > +
> > + __skb->priority = skb->priority;
> > + memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
> > +}
> > +
> > int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > union bpf_attr __user *uattr)
> > {
> > bool is_l2 = false, is_direct_pkt_access = false;
> > u32 size = kattr->test.data_size_in;
> > u32 repeat = kattr->test.repeat;
> > + struct __sk_buff *ctx = NULL;
> > u32 retval, duration;
> > int hh_len = ETH_HLEN;
> > struct sk_buff *skb;
> > @@ -141,6 +250,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > if (IS_ERR(data))
> > return PTR_ERR(data);
> >
> > + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
> > + if (IS_ERR(ctx)) {
> > + kfree(data);
> > + return PTR_ERR(ctx);
> > + }
> > +
> > switch (prog->type) {
> > case BPF_PROG_TYPE_SCHED_CLS:
> > case BPF_PROG_TYPE_SCHED_ACT:
> > @@ -158,6 +273,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > sk = kzalloc(sizeof(struct sock), GFP_USER);
> > if (!sk) {
> > kfree(data);
> > + kfree(ctx);
> > return -ENOMEM;
> > }
> > sock_net_set(sk, current->nsproxy->net_ns);
> > @@ -166,6 +282,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > skb = build_skb(data, 0);
> > if (!skb) {
> > kfree(data);
> > + kfree(ctx);
> > kfree(sk);
> > return -ENOMEM;
> > }
> > @@ -180,12 +297,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > __skb_push(skb, hh_len);
> > if (is_direct_pkt_access)
> > bpf_compute_data_pointers(skb);
> > + ret = convert___skb_to_skb(skb, ctx);
> > + if (ret)
> > + goto out;
> > ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
> > - if (ret) {
> > - kfree_skb(skb);
> > - kfree(sk);
> > - return ret;
> > - }
> > + if (ret)
> > + goto out;
> > if (!is_l2) {
> > if (skb_headroom(skb) < hh_len) {
> > int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
> I think the following needs kfree(ctx) also (or goto out).
> Copying the context here:
>
> if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
> kfree_skb(skb);
> kfree(sk);
> return -ENOMEM;
> }
>
Ah, good catch, not sure how I missed that :-(
Thank you for a review, will follow up with a v2!
> > @@ -198,14 +315,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > }
> > memset(__skb_push(skb, hh_len), 0, hh_len);
> > }
> > + convert_skb_to___skb(skb, ctx);
> >
> > size = skb->len;
> > /* bpf program can never convert linear skb to non-linear */
> > if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
> > size = skb_headlen(skb);
> > ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
> > + if (!ret)
> > + ret = bpf_ctx_finish(kattr, uattr, ctx,
> > + sizeof(struct __sk_buff));
> > +out:
> > kfree_skb(skb);
> > kfree(sk);
> > + kfree(ctx);
> > return ret;
> > }
> >
> > --
> > 2.21.0.392.gf8f6787159e-goog
> >
Powered by blists - more mailing lists