[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <B03E3B5A-50E2-4291-B091-23927AF6846F@fb.com>
Date: Thu, 22 Jun 2017 22:58:52 +0000
From: Lawrence Brakmo <brakmo@...com>
To: Daniel Borkmann <daniel@...earbox.net>,
netdev <netdev@...r.kernel.org>
CC: Kernel Team <Kernel-team@...com>, Blake Matheny <bmatheny@...com>,
"Alexei Starovoitov" <ast@...com>,
David Ahern <dsa@...ulusnetworks.com>
Subject: Re: [PATCH net-next v3 01/15] bpf: BPF support for sock_ops
On 6/22/17, 3:41 PM, "Daniel Borkmann" <daniel@...earbox.net> wrote:
On 06/20/2017 05:00 AM, Lawrence Brakmo wrote:
[...]
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index f94b48b..861dbe9 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -120,12 +120,14 @@ enum bpf_prog_type {
> BPF_PROG_TYPE_LWT_IN,
> BPF_PROG_TYPE_LWT_OUT,
> BPF_PROG_TYPE_LWT_XMIT,
> + BPF_PROG_TYPE_SOCK_OPS,
> };
>
> enum bpf_attach_type {
> BPF_CGROUP_INET_INGRESS,
> BPF_CGROUP_INET_EGRESS,
> BPF_CGROUP_INET_SOCK_CREATE,
> + BPF_GLOBAL_SOCK_OPS,
> __MAX_BPF_ATTACH_TYPE
> };
[...]
> #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 8942c82..e02831f 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
[...]
> +static int bpf_prog_attach(const union bpf_attr *attr)
> +{
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + if (CHECK_ATTR(BPF_PROG_ATTACH))
> + return -EINVAL;
> +
> + if (attr->attach_type == BPF_GLOBAL_SOCK_OPS)
> + return bpf_sock_ops_attach_global_prog(attr->attach_bpf_fd);
> + else
> + return bpf_prog_attach_cgroup(attr);
> +}
> +
[...]
> +static int bpf_prog_detach(const union bpf_attr *attr)
> +{
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + if (CHECK_ATTR(BPF_PROG_DETACH))
> + return -EINVAL;
> +
> + if (attr->attach_type == BPF_GLOBAL_SOCK_OPS)
> + return bpf_sock_ops_detach_global_prog();
> + else
> + return bpf_prog_detach_cgroup(attr);
> +}
>
> #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
>
> @@ -1431,14 +1467,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
> case BPF_OBJ_GET:
> err = bpf_obj_get(&attr);
> break;
> -#ifdef CONFIG_CGROUP_BPF
> case BPF_PROG_ATTACH:
> err = bpf_prog_attach(&attr);
> break;
> case BPF_PROG_DETACH:
> err = bpf_prog_detach(&attr);
> break;
> -#endif
> case BPF_PROG_TEST_RUN:
> err = bpf_prog_test_run(&attr, uattr);
> break;
[...]
> diff --git a/net/core/sock_bpfops.c b/net/core/sock_bpfops.c
> new file mode 100644
> index 0000000..06f4a64
> --- /dev/null
> +++ b/net/core/sock_bpfops.c
> @@ -0,0 +1,65 @@
> +/*
> + * BPF support for sockets
> + *
> + * Copyright (c) 2016 Lawrence Brakmo <brakmo@...com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation.
> + */
> +
> +#include <net/sock.h>
> +#include <linux/skbuff.h>
> +#include <linux/bpf.h>
> +#include <linux/filter.h>
> +#include <linux/errno.h>
> +#ifdef CONFIG_NET_NS
> +#include <net/net_namespace.h>
> +#include <linux/proc_ns.h>
> +#endif
> +
> +/* Global BPF program for sockets */
> +static struct bpf_prog *bpf_global_sock_ops_prog;
> +
> +int bpf_sock_ops_detach_global_prog(void)
> +{
> + struct bpf_prog *old_prog;
> +
> + old_prog = xchg(&bpf_global_sock_ops_prog, NULL);
> +
> + if (old_prog)
> + bpf_prog_put(old_prog);
> +
> + return 0;
> +}
> +
> +int bpf_sock_ops_attach_global_prog(int fd)
> +{
> + struct bpf_prog *prog, *old_prog;
> + int err = 0;
> +
> + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCK_OPS);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + old_prog = xchg(&bpf_global_sock_ops_prog, prog);
> + if (old_prog)
> + bpf_prog_put(old_prog);
> + return err;
> +}
> +
> +int bpf_sock_ops_call(struct bpf_sock_ops_kern *bpf_sock)
> +{
> + struct bpf_prog *prog;
> + int ret;
> +
> + rcu_read_lock();
> + prog = READ_ONCE(bpf_global_sock_ops_prog);
> + if (prog)
> + ret = BPF_PROG_RUN(prog, bpf_sock);
> + else
> + ret = -1;
> + rcu_read_unlock();
> +
> + return ret;
> +}
Now that we integrate with BPF_PROG_ATTACH/DETACH, can you make all
the above also per cgroup as we have with all other BPF_CGROUP_INET_*
progs? It seems kind of weird that we have one single global program
doing the enforcement of TCP and congctl options. Something on a more
fine-grained level like cgroups would be more suited wrt containers,
etc. Right now there's no notion of global program of such kind.
Thanks,
Daniel
Daniel, I see value for having a global program, so I would like to keep that. When
this patchset is accepted, I will submit one that adds support for per cgroup
sock_ops programs, with the option to use the global one if none is
specified for a cgroup. We could also have the option of the cgroup sock_ops
program choosing if the global program should run for a particular op based on
its return value. We can iron it out the details when that patch is submitted.
Is it acceptable?
Thanks,
Lawrence
Powered by blists - more mailing lists