[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5A472047-F329-43C3-9DBC-9BCFC0A19F1C@fb.com>
Date: Wed, 26 Jun 2019 15:17:47 +0000
From: Song Liu <songliubraving@...com>
To: Daniel Borkmann <daniel@...earbox.net>
CC: Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
"Alexei Starovoitov" <ast@...nel.org>,
Kernel Team <Kernel-team@...com>,
"jannh@...gle.com" <jannh@...gle.com>
Subject: Re: [PATCH bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
> On Jun 26, 2019, at 6:32 AM, Daniel Borkmann <daniel@...earbox.net> wrote:
>
> On 06/25/2019 08:23 PM, Song Liu wrote:
>> This patch introduce unprivileged BPF access. The access control is
>> achieved via device /dev/bpf. Users with access to /dev/bpf are able
>> to access BPF syscall.
>>
>> Two ioctl command are added to /dev/bpf:
>>
>> The first two commands get/put permission to access sys_bpf. This
>> permission is noted by setting bit TASK_BPF_FLAG_PERMITTED of
>> current->bpf_flags. This permission cannot be inherited via fork().
>>
>> Helper function bpf_capable() is added to check whether the task has got
>> permission via /dev/bpf.
>>
>> Signed-off-by: Song Liu <songliubraving@...com>
>
> [ Lets Cc Jann so he has a chance to review as he was the one who suggested
> the idea. ]
>
>> ---
>> Documentation/ioctl/ioctl-number.txt | 1 +
>> include/linux/bpf.h | 12 +++++
>> include/linux/sched.h | 8 ++++
>> include/uapi/linux/bpf.h | 5 ++
>> kernel/bpf/arraymap.c | 2 +-
>> kernel/bpf/cgroup.c | 2 +-
>> kernel/bpf/core.c | 4 +-
>> kernel/bpf/cpumap.c | 2 +-
>> kernel/bpf/devmap.c | 2 +-
>> kernel/bpf/hashtab.c | 4 +-
>> kernel/bpf/lpm_trie.c | 2 +-
>> kernel/bpf/offload.c | 2 +-
>> kernel/bpf/queue_stack_maps.c | 2 +-
>> kernel/bpf/reuseport_array.c | 2 +-
>> kernel/bpf/stackmap.c | 2 +-
>> kernel/bpf/syscall.c | 72 +++++++++++++++++++++-------
>> kernel/bpf/verifier.c | 2 +-
>> kernel/bpf/xskmap.c | 2 +-
>> kernel/fork.c | 4 ++
>> net/core/filter.c | 6 +--
>> 20 files changed, 104 insertions(+), 34 deletions(-)
>>
>> diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
>> index c9558146ac58..19998b99d603 100644
>> --- a/Documentation/ioctl/ioctl-number.txt
>> +++ b/Documentation/ioctl/ioctl-number.txt
>> @@ -327,6 +327,7 @@ Code Seq#(hex) Include File Comments
>> 0xB4 00-0F linux/gpio.h <mailto:linux-gpio@...r.kernel.org>
>> 0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@...r.kernel.org>
>> 0xB6 all linux/fpga-dfl.h
>> +0xBP 01-02 uapi/linux/bpf.h <mailto:bpf@...r.kernel.org>
>> 0xC0 00-0F linux/usb/iowarrior.h
>> 0xCA 00-0F uapi/misc/cxl.h
>> 0xCA 10-2F uapi/misc/ocxl.h
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index a62e7889b0b6..dbba7870f6df 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -14,6 +14,10 @@
>> #include <linux/numa.h>
>> #include <linux/wait.h>
>> #include <linux/u64_stats_sync.h>
>> +#include <linux/sched.h>
>> +#include <linux/capability.h>
>> +
>> +#include <asm/current.h>
>>
>> struct bpf_verifier_env;
>> struct perf_event;
>> @@ -742,6 +746,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>> int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>> const union bpf_attr *kattr,
>> union bpf_attr __user *uattr);
>> +
>> +static inline bool bpf_capable(int cap)
>> +{
>> + return test_bit(TASK_BPF_FLAG_PERMITTED, ¤t->bpf_flags) ||
>> + capable(cap);
>> +}
>> #else /* !CONFIG_BPF_SYSCALL */
>> static inline struct bpf_prog *bpf_prog_get(u32 ufd)
>> {
>> @@ -874,6 +884,8 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>> {
>> return -ENOTSUPP;
>> }
>> +
>> +#define bpf_capable(cap) capable((cap))
>> #endif /* CONFIG_BPF_SYSCALL */
>>
>> static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 11837410690f..ddd33d4476c5 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -1200,6 +1200,10 @@ struct task_struct {
>> unsigned long prev_lowest_stack;
>> #endif
>>
>> +#ifdef CONFIG_BPF_SYSCALL
>> + unsigned long bpf_flags;
>> +#endif
>
> There are plenty of bits available here:
>
> /* --- cacheline 14 boundary (896 bytes) --- */
> unsigned int in_execve:1; /* 896:31 4 */
> unsigned int in_iowait:1; /* 896:30 4 */
> unsigned int restore_sigmask:1; /* 896:29 4 */
> unsigned int in_user_fault:1; /* 896:28 4 */
> unsigned int no_cgroup_migration:1; /* 896:27 4 */
> unsigned int frozen:1; /* 896:26 4 */
> unsigned int use_memdelay:1; /* 896:25 4 */
>
> /* XXX 25 bits hole, try to pack */
> /* XXX 4 bytes hole, try to pack */
>
> Given that bpf is pretty much enabled by default everywhere, I don't think we
> should waste so much space in task_struct just for this flag (pretty sure that
> task_struct is the equivalent of sk_buff that rather needs a diet). Other options
> could be to add to atomic_flags which also still has space.
Good point. Let me find a free bit for it.
>
>> /*
>> * New fields for task_struct should be added above here, so that
>> * they are included in the randomized portion of task_struct.
>> @@ -1772,6 +1776,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
>>
>> #endif /* CONFIG_SMP */
[...]
>> +
>> +static long bpf_dev_ioctl(struct file *filp,
>> + unsigned int ioctl, unsigned long arg)
>> +{
>> + switch (ioctl) {
>> + case BPF_DEV_IOCTL_GET_PERM:
>> + set_bit(TASK_BPF_FLAG_PERMITTED, ¤t->bpf_flags);
>> + break;
>> + case BPF_DEV_IOCTL_PUT_PERM:
>> + clear_bit(TASK_BPF_FLAG_PERMITTED, ¤t->bpf_flags);
>
> I think the get/put for uapi is a bit misleading, first thought at least for
> me is on get/put_user() when I read the name.
I am not good at naming things. What would be better names here?
>
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> + return 0;
>> +}
>> +
>> +static const struct file_operations bpf_chardev_ops = {
>> + .unlocked_ioctl = bpf_dev_ioctl,
>> +};
>> +
>> +static struct miscdevice bpf_dev = {
>> + .minor = MISC_DYNAMIC_MINOR,
>> + .name = "bpf",
>> + .fops = &bpf_chardev_ops,
>> + .mode = 0440,
>> + .nodename = "bpf",
>
> Here's what kvm does:
>
> static struct miscdevice kvm_dev = {
> KVM_MINOR,
> "kvm",
> &kvm_chardev_ops,
> };
>
> Is there an actual reason that mode is not 0 by default in bpf case? Why
> we need to define nodename?
Based on my understanding, mode of 0440 is what we want. If we leave it
as 0, it will use default value of 0600. I guess we can just set it to
0440, as user space can change it later anyway.
I guess we really don't need nodename. I will remove it.
Thanks,
Song
Powered by blists - more mailing lists