[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <58dd821b-c9b2-ac45-d47a-e5f75aec3d68@gmail.com>
Date: Mon, 13 Jul 2020 08:19:45 -0600
From: David Ahern <dsahern@...il.com>
To: Andrii Nakryiko <andriin@...com>, bpf@...r.kernel.org,
netdev@...r.kernel.org, ast@...com, daniel@...earbox.net
Cc: andrii.nakryiko@...il.com, kernel-team@...com,
Jakub Kicinski <kicinski@...com>, Andrey Ignatov <rdna@...com>,
Takshak Chahande <ctakshak@...com>,
Toke Høiland-Jørgensen
<toke@...hat.com>
Subject: Re: [PATCH bpf-next 2/7] bpf, xdp: add bpf_link-based XDP attachment
API
On 7/10/20 4:49 PM, Andrii Nakryiko wrote:
> Add bpf_link-based API (bpf_xdp_link) to attach BPF XDP program through
> BPF_LINK_CREATE command.
>
> bpf_xdp_link is mutually exclusive with direct BPF program attachment,
> previous BPF program should be detached prior to attempting to create a new
> bpf_xdp_link attachment (for a given XDP mode). Once link is attached, it
> can't be replaced by other BPF program attachment or link attachment. It will
> be detached only when the last BPF link FD is closed.
>
> bpf_xdp_link will be auto-detached when net_device is shutdown, similarly to
> how other BPF links behave (cgroup, flow_dissector). At that point bpf_link
> will become defunct, but won't be destroyed until last FD is closed.
>
> Signed-off-by: Andrii Nakryiko <andriin@...com>
> ---
> include/linux/netdevice.h | 6 +
> include/uapi/linux/bpf.h | 7 +-
> kernel/bpf/syscall.c | 5 +
> net/core/dev.c | 385 ++++++++++++++++++++++++++++----------
That's big diff for 1 patch. A fair bit of is refactoring / code
movement that can be done in a separate refactoring patch making it
cleaer what changes you need for the bpf_link piece.
> 4 files changed, 301 insertions(+), 102 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d5630e535836..93bcd81d645d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -886,6 +886,7 @@ struct bpf_prog_offload_ops;
> struct netlink_ext_ack;
> struct xdp_umem;
> struct xdp_dev_bulk_queue;
> +struct bpf_xdp_link;
>
> enum bpf_xdp_mode {
> XDP_MODE_SKB = 0,
> @@ -896,6 +897,7 @@ enum bpf_xdp_mode {
>
> struct bpf_xdp_entity {
> struct bpf_prog *prog;
> + struct bpf_xdp_link *link;
> };
>
> struct netdev_bpf {
> @@ -3824,6 +3826,10 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
> int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
> int fd, int expected_fd, u32 flags);
> u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
> +
> +struct bpf_xdp_link;
already stated above.
> +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> +
> int xdp_umem_query(struct net_device *dev, u16 queue_id);
>
> int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 548a749aebb3..41eba148217b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -227,6 +227,7 @@ enum bpf_attach_type {
> BPF_CGROUP_INET6_GETSOCKNAME,
> BPF_XDP_DEVMAP,
> BPF_CGROUP_INET_SOCK_RELEASE,
> + BPF_XDP,
This really does not add value for the uapi. The link_type uniquely
identifies the type and the expected program type.
> __MAX_BPF_ATTACH_TYPE
> };
>
> @@ -239,6 +240,7 @@ enum bpf_link_type {
> BPF_LINK_TYPE_CGROUP = 3,
> BPF_LINK_TYPE_ITER = 4,
> BPF_LINK_TYPE_NETNS = 5,
> + BPF_LINK_TYPE_XDP = 6,
>
> MAX_BPF_LINK_TYPE,
> };
> @@ -604,7 +606,10 @@ union bpf_attr {
>
> struct { /* struct used by BPF_LINK_CREATE command */
> __u32 prog_fd; /* eBPF program to attach */
> - __u32 target_fd; /* object to attach to */
> + union {
> + __u32 target_fd; /* object to attach to */
> + __u32 target_ifindex; /* target ifindex */
> + };
> __u32 attach_type; /* attach type */
> __u32 flags; /* extra flags */
> } link_create;
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 156f51ffada2..eb4ed4b29418 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2817,6 +2817,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
> return BPF_PROG_TYPE_CGROUP_SOCKOPT;
> case BPF_TRACE_ITER:
> return BPF_PROG_TYPE_TRACING;
> + case BPF_XDP:
> + return BPF_PROG_TYPE_XDP;
> default:
> return BPF_PROG_TYPE_UNSPEC;
> }
> @@ -3890,6 +3892,9 @@ static int link_create(union bpf_attr *attr)
> case BPF_PROG_TYPE_FLOW_DISSECTOR:
> ret = netns_bpf_link_create(attr, prog);
> break;
> + case BPF_PROG_TYPE_XDP:
> + ret = bpf_xdp_link_attach(attr, prog);
> + break;
> default:
> ret = -EINVAL;
> }
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d3b82b664e2d..84f755a1ec36 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8713,8 +8713,47 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
> }
> EXPORT_SYMBOL(dev_change_proto_down_generic);
>
> -static struct bpf_prog *dev_xdp_prog(struct net_device *dev, enum bpf_xdp_mode mode)
> +struct bpf_xdp_link {
> + struct bpf_link link;
> + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
> + int flags;
> +};
> +
> +static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
> +{
> + if (flags & XDP_FLAGS_HW_MODE)
> + return XDP_MODE_HW;
> + if (flags & XDP_FLAGS_DRV_MODE)
> + return XDP_MODE_DRV;
> + return XDP_MODE_SKB;
> +}
> +
> +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
> {
> + switch (mode) {
> + case XDP_MODE_SKB:
> + return generic_xdp_install;
> + case XDP_MODE_DRV:
> + case XDP_MODE_HW:
> + return dev->netdev_ops->ndo_bpf;
> + default:
> + return NULL;
> + };
> +}
> +
> +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
> + enum bpf_xdp_mode mode)
> +{
> + return dev->xdp_state[mode].link;
> +}
> +
> +static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
> + enum bpf_xdp_mode mode)
> +{
> + struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
> +
> + if (link)
> + return link->link.prog;
> return dev->xdp_state[mode].prog;
> }
>
> @@ -8725,9 +8764,17 @@ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
> return prog ? prog->aux->id : 0;
> }
>
> +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
> + struct bpf_xdp_link *link)
> +{
> + dev->xdp_state[mode].link = link;
> + dev->xdp_state[mode].prog = NULL;
> +}
> +
> static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
> struct bpf_prog *prog)
> {
> + dev->xdp_state[mode].link = NULL;
> dev->xdp_state[mode].prog = prog;
> }
>
> @@ -8744,6 +8791,14 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
> xdp.flags = flags;
> xdp.prog = prog;
>
> + /* Drivers assume refcnt is already incremented (i.e, prog pointer is
> + * "moved" into driver), so they don't increment it on their own, but
> + * they do decrement refcnt when program is detached or replaced.
> + * Given net_device also owns link/prog, we need to bump refcnt here
> + * to prevent drivers from underflowing it.
> + */
> + if (prog)
> + bpf_prog_inc(prog);
Why is this refcnt bump not needed today but is needed for your change?
> err = bpf_op(dev, &xdp);
> if (err)
> return err;
and the error path is not decrementing it.
> @@ -8756,39 +8811,221 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
>
> static void dev_xdp_uninstall(struct net_device *dev)
> {
> - bpf_op_t ndo_bpf;
> + struct bpf_xdp_link *link;
> + struct bpf_prog *prog;
> + enum bpf_xdp_mode mode;
> + bpf_op_t bpf_op;
>
> - /* Remove generic XDP */
> - WARN_ON(dev_xdp_install(dev, XDP_MODE_SKB, generic_xdp_install,
> - NULL, 0, NULL));
> - dev_xdp_set_prog(dev, XDP_MODE_SKB, NULL);
> + ASSERT_RTNL();
>
> - /* Remove from the driver */
> - ndo_bpf = dev->netdev_ops->ndo_bpf;
> - if (!ndo_bpf)
> - return;
> + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
> + prog = dev_xdp_prog(dev, mode);
> + if (!prog)
> + continue;
> +
> + bpf_op = dev_xdp_bpf_op(dev, mode);
> + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
> +
> + /* auto-detach link from net device */
> + link = dev_xdp_link(dev, mode);
> + if (link)
> + link->dev = NULL;
> + else
> + bpf_prog_put(prog);
> +
> + dev_xdp_set_link(dev, mode, NULL);
> + }
> +}
> +
> +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
> + struct bpf_xdp_link *link, struct bpf_prog *new_prog,
> + struct bpf_prog *old_prog, u32 flags)
> +{
> + struct bpf_prog *cur_prog;
> + enum bpf_xdp_mode mode;
> + bpf_op_t bpf_op;
> + int err;
> +
> + ASSERT_RTNL();
>
> - if (dev_xdp_prog_id(dev, XDP_MODE_DRV)) {
> - WARN_ON(dev_xdp_install(dev, XDP_MODE_DRV, ndo_bpf,
> - NULL, 0, NULL));
> - dev_xdp_set_prog(dev, XDP_MODE_DRV, NULL);
> + /* link supports only XDP mode flags */
> + if (link && (flags & ~XDP_FLAGS_MODES))
> + return -EINVAL;
everyone of the -errno returns needs an extack message explaining why
Powered by blists - more mailing lists