linux-kernel - Re: [PATCH v2 2/5] seccomp, bpf: Introduce SECCOMP_LOAD

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0df30939-1ba1-5703-58cc-54058fbb1df5@iogearbox.net>
Date:   Mon, 16 Oct 2023 14:44:34 +0200
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Hengqi Chen <hengqi.chen@...il.com>, linux-kernel@...r.kernel.org,
        bpf@...r.kernel.org
Cc:     keescook@...omium.org, ast@...nel.org, andrii@...nel.org,
        luto@...capital.net, wad@...omium.org, alexyonghe@...cent.com
Subject: Re: [PATCH v2 2/5] seccomp, bpf: Introduce SECCOMP_LOAD_FILTER
 operation

On 10/16/23 1:29 AM, Hengqi Chen wrote:
> This patch adds a new operation named SECCOMP_LOAD_FILTER.
> It accepts a sock_fprog the same as SECCOMP_SET_MODE_FILTER
> but only performs the loading process. If succeed, return a
> new fd associated with the JITed BPF program (the filter).
> The filter can then be pinned to bpffs using the returned
> fd and reused for different processes. To distinguish the
> filter from other BPF progs, BPF_PROG_TYPE_SECCOMP is added.
> 
> Signed-off-by: Hengqi Chen <hengqi.chen@...il.com>
> ---
>   include/uapi/linux/bpf.h       |  1 +
>   include/uapi/linux/seccomp.h   |  1 +
>   kernel/seccomp.c               | 43 ++++++++++++++++++++++++++++++++++
>   tools/include/uapi/linux/bpf.h |  1 +
>   4 files changed, 46 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 7ba61b75bc0e..61c80ffb1724 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -995,6 +995,7 @@ enum bpf_prog_type {
>   	BPF_PROG_TYPE_SK_LOOKUP,
>   	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
>   	BPF_PROG_TYPE_NETFILTER,
> +	BPF_PROG_TYPE_SECCOMP,

Please don't extend UAPI surface if this is not reachable/usable from user
space anyway.

>   enum bpf_attach_type {
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index dbfc9b37fcae..ee2c83697810 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -16,6 +16,7 @@
>   #define SECCOMP_SET_MODE_FILTER		1
>   #define SECCOMP_GET_ACTION_AVAIL	2
>   #define SECCOMP_GET_NOTIF_SIZES		3
> +#define SECCOMP_LOAD_FILTER		4
>   
>   /* Valid flags for SECCOMP_SET_MODE_FILTER */
>   #define SECCOMP_FILTER_FLAG_TSYNC		(1UL << 0)
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index faf84fc892eb..c9f6a19f7a4e 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -17,6 +17,7 @@
>   
>   #include <linux/refcount.h>
>   #include <linux/audit.h>
> +#include <linux/bpf.h>
>   #include <linux/compat.h>
>   #include <linux/coredump.h>
>   #include <linux/kmemleak.h>
> @@ -25,6 +26,7 @@
>   #include <linux/sched.h>
>   #include <linux/sched/task_stack.h>
>   #include <linux/seccomp.h>
> +#include <linux/security.h>
>   #include <linux/slab.h>
>   #include <linux/syscalls.h>
>   #include <linux/sysctl.h>
> @@ -2032,12 +2034,48 @@ static long seccomp_set_mode_filter(unsigned int flags,
>   	seccomp_filter_free(prepared);
>   	return ret;
>   }
> +
> +static long seccomp_load_filter(const char __user *filter)
> +{
> +	struct sock_fprog fprog;
> +	struct bpf_prog *prog;
> +	int ret;
> +
> +	ret = seccomp_copy_user_filter(filter, &fprog);
> +	if (ret)
> +		return ret;
> +
> +	ret = seccomp_prepare_prog(&prog, &fprog);
> +	if (ret)
> +		return ret;
> +
> +	ret = security_bpf_prog_alloc(prog->aux);
> +	if (ret) {
> +		bpf_prog_free(prog);
> +		return ret;
> +	}
> +
> +	prog->aux->user = get_current_user();
> +	atomic64_set(&prog->aux->refcnt, 1);
> +	prog->type = BPF_PROG_TYPE_SECCOMP;
> +
> +	ret = bpf_prog_new_fd(prog);
> +	if (ret < 0)
> +		bpf_prog_put(prog);

My bigger concern here is that bpf_prog_new_fd() is only used by eBPF (not cBPF).

Then you get an 'eBPF'-like fd back to user space which you can pass to various
other bpf(2) commands like BPF_OBJ_GET_INFO_BY_FD etc which all have the assumption
that this is a proper looking eBPF prog fd.

There may be breakage/undefined behavior in subtle ways.

I would suggest two potential alternatives :

1) Build a seccomp-specific fd via anon_inode_getfd() so that BPF side does not
    confuse it with bpf_prog_fops and therefore does not recognize it in bpf(2)
    as a prog fd.

2) Extend seccomp where proper eBPF could be supported.

If option 2) is not realistic (where you would get this out of the box), then I
think 1) could be however.

> +	return ret;
> +}
>   #else
>   static inline long seccomp_set_mode_filter(unsigned int flags,
>   					   const char __user *filter)
>   {
>   	return -EINVAL;
>   }
> +
> +static inline long seccomp_load_filter(const char __user *filter)
> +{
> +	return -EINVAL;
> +}
>   #endif
>   
>   static long seccomp_get_action_avail(const char __user *uaction)
> @@ -2099,6 +2137,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
>   			return -EINVAL;
>   
>   		return seccomp_get_notif_sizes(uargs);
> +	case SECCOMP_LOAD_FILTER:
> +		if (flags != 0)
> +			return -EINVAL;
> +
> +		return seccomp_load_filter(uargs);
>   	default:
>   		return -EINVAL;
>   	}
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 7ba61b75bc0e..61c80ffb1724 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -995,6 +995,7 @@ enum bpf_prog_type {
>   	BPF_PROG_TYPE_SK_LOOKUP,
>   	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
>   	BPF_PROG_TYPE_NETFILTER,
> +	BPF_PROG_TYPE_SECCOMP,
>   };
>   
>   enum bpf_attach_type {
>