netdev - Re: [PATCH net-next v3 01/15] bpf: BPF support for sock

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <B03E3B5A-50E2-4291-B091-23927AF6846F@fb.com>
Date:   Thu, 22 Jun 2017 22:58:52 +0000
From:   Lawrence Brakmo <brakmo@...com>
To:     Daniel Borkmann <daniel@...earbox.net>,
        netdev <netdev@...r.kernel.org>
CC:     Kernel Team <Kernel-team@...com>, Blake Matheny <bmatheny@...com>,
        "Alexei Starovoitov" <ast@...com>,
        David Ahern <dsa@...ulusnetworks.com>
Subject: Re: [PATCH net-next v3 01/15] bpf: BPF support for sock_ops


On 6/22/17, 3:41 PM, "Daniel Borkmann" <daniel@...earbox.net> wrote:

    On 06/20/2017 05:00 AM, Lawrence Brakmo wrote:
    [...]
    > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
    > index f94b48b..861dbe9 100644
    > --- a/include/uapi/linux/bpf.h
    > +++ b/include/uapi/linux/bpf.h
    > @@ -120,12 +120,14 @@ enum bpf_prog_type {
    >   	BPF_PROG_TYPE_LWT_IN,
    >   	BPF_PROG_TYPE_LWT_OUT,
    >   	BPF_PROG_TYPE_LWT_XMIT,
    > +	BPF_PROG_TYPE_SOCK_OPS,
    >   };
    >
    >   enum bpf_attach_type {
    >   	BPF_CGROUP_INET_INGRESS,
    >   	BPF_CGROUP_INET_EGRESS,
    >   	BPF_CGROUP_INET_SOCK_CREATE,
    > +	BPF_GLOBAL_SOCK_OPS,
    >   	__MAX_BPF_ATTACH_TYPE
    >   };
    [...]
    >   #endif /* _UAPI__LINUX_BPF_H__ */
    > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
    > index 8942c82..e02831f 100644
    > --- a/kernel/bpf/syscall.c
    > +++ b/kernel/bpf/syscall.c
    [...]
    > +static int bpf_prog_attach(const union bpf_attr *attr)
    > +{
    > +	if (!capable(CAP_NET_ADMIN))
    > +		return -EPERM;
    > +
    > +	if (CHECK_ATTR(BPF_PROG_ATTACH))
    > +		return -EINVAL;
    > +
    > +	if (attr->attach_type == BPF_GLOBAL_SOCK_OPS)
    > +		return bpf_sock_ops_attach_global_prog(attr->attach_bpf_fd);
    > +	else
    > +		return bpf_prog_attach_cgroup(attr);
    > +}
    > +
    [...]
    > +static int bpf_prog_detach(const union bpf_attr *attr)
    > +{
    > +	if (!capable(CAP_NET_ADMIN))
    > +		return -EPERM;
    > +
    > +	if (CHECK_ATTR(BPF_PROG_DETACH))
    > +		return -EINVAL;
    > +
    > +	if (attr->attach_type == BPF_GLOBAL_SOCK_OPS)
    > +		return bpf_sock_ops_detach_global_prog();
    > +	else
    > +		return bpf_prog_detach_cgroup(attr);
    > +}
    >
    >   #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
    >
    > @@ -1431,14 +1467,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
    >   	case BPF_OBJ_GET:
    >   		err = bpf_obj_get(&attr);
    >   		break;
    > -#ifdef CONFIG_CGROUP_BPF
    >   	case BPF_PROG_ATTACH:
    >   		err = bpf_prog_attach(&attr);
    >   		break;
    >   	case BPF_PROG_DETACH:
    >   		err = bpf_prog_detach(&attr);
    >   		break;
    > -#endif
    >   	case BPF_PROG_TEST_RUN:
    >   		err = bpf_prog_test_run(&attr, uattr);
    >   		break;
    [...]
    > diff --git a/net/core/sock_bpfops.c b/net/core/sock_bpfops.c
    > new file mode 100644
    > index 0000000..06f4a64
    > --- /dev/null
    > +++ b/net/core/sock_bpfops.c
    > @@ -0,0 +1,65 @@
    > +/*
    > + * BPF support for sockets
    > + *
    > + * Copyright (c) 2016 Lawrence Brakmo <brakmo@...com>
    > + *
    > + * This program is free software; you can redistribute it and/or modify
    > + * it under the terms of the GNU General Public License version 2
    > + * as published by the Free Software Foundation.
    > + */
    > +
    > +#include <net/sock.h>
    > +#include <linux/skbuff.h>
    > +#include <linux/bpf.h>
    > +#include <linux/filter.h>
    > +#include <linux/errno.h>
    > +#ifdef CONFIG_NET_NS
    > +#include <net/net_namespace.h>
    > +#include <linux/proc_ns.h>
    > +#endif
    > +
    > +/* Global BPF program for sockets */
    > +static struct bpf_prog *bpf_global_sock_ops_prog;
    > +
    > +int bpf_sock_ops_detach_global_prog(void)
    > +{
    > +	struct bpf_prog *old_prog;
    > +
    > +	old_prog = xchg(&bpf_global_sock_ops_prog, NULL);
    > +
    > +	if (old_prog)
    > +		bpf_prog_put(old_prog);
    > +
    > +	return 0;
    > +}
    > +
    > +int bpf_sock_ops_attach_global_prog(int fd)
    > +{
    > +	struct bpf_prog *prog, *old_prog;
    > +	int err = 0;
    > +
    > +	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCK_OPS);
    > +	if (IS_ERR(prog))
    > +		return PTR_ERR(prog);
    > +
    > +	old_prog = xchg(&bpf_global_sock_ops_prog, prog);
    > +	if (old_prog)
    > +		bpf_prog_put(old_prog);
    > +	return err;
    > +}
    > +
    > +int bpf_sock_ops_call(struct bpf_sock_ops_kern *bpf_sock)
    > +{
    > +	struct bpf_prog *prog;
    > +	int ret;
    > +
    > +	rcu_read_lock();
    > +	prog =  READ_ONCE(bpf_global_sock_ops_prog);
    > +	if (prog)
    > +		ret = BPF_PROG_RUN(prog, bpf_sock);
    > +	else
    > +		ret = -1;
    > +	rcu_read_unlock();
    > +
    > +	return ret;
    > +}
    
    Now that we integrate with BPF_PROG_ATTACH/DETACH, can you make all
    the above also per cgroup as we have with all other BPF_CGROUP_INET_*
    progs? It seems kind of weird that we have one single global program
    doing the enforcement of TCP and congctl options. Something on a more
    fine-grained level like cgroups would be more suited wrt containers,
    etc. Right now there's no notion of global program of such kind.
    
    Thanks,
    Daniel


Daniel, I see value for having a global program, so I would like to keep that. When
this patchset is accepted, I will submit one that adds support for per cgroup
sock_ops programs, with the option to use the global one if none is
specified for a cgroup. We could also have the option of the cgroup sock_ops
program choosing if the global program should run for a particular op based on
its return value. We can iron it out the details when that patch is submitted.

Is it acceptable?

Thanks,
Lawrence