linux-kernel - Re: [PATCH 1/6] ebpf: add a seccomp program type

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGXu5j+9YvL6XzX=x8krZbdJXFpe-3FuMu=hAZX9Ew=hYyuK7Q@mail.gmail.com>
Date:	Fri, 4 Sep 2015 13:34:12 -0700
From:	Kees Cook <keescook@...omium.org>
To:	Tycho Andersen <tycho.andersen@...onical.com>
Cc:	Alexei Starovoitov <ast@...nel.org>,
	Will Drewry <wad@...omium.org>,
	Oleg Nesterov <oleg@...hat.com>,
	Andy Lutomirski <luto@...capital.net>,
	Pavel Emelyanov <xemul@...allels.com>,
	"Serge E. Hallyn" <serge.hallyn@...ntu.com>,
	Daniel Borkmann <daniel@...earbox.net>,
	LKML <linux-kernel@...r.kernel.org>,
	Network Development <netdev@...r.kernel.org>
Subject: Re: [PATCH 1/6] ebpf: add a seccomp program type

On Fri, Sep 4, 2015 at 9:04 AM, Tycho Andersen
<tycho.andersen@...onical.com> wrote:
> seccomp uses eBPF as its underlying storage and execution format, and eBPF
> has features that seccomp would like to make use of in the future. This
> patch adds a formal seccomp type to the eBPF verifier.
>
> The current implementation of the seccomp eBPF type is very limited, and
> doesn't support some interesting features (notably, maps) of eBPF. However,
> the primary motivation for this patchset is to enable checkpoint/restore
> for seccomp filters later in the series, to this limited feature set is ok
> for now.
>
> Signed-off-by: Tycho Andersen <tycho.andersen@...onical.com>
> CC: Kees Cook <keescook@...omium.org>
> CC: Will Drewry <wad@...omium.org>
> CC: Oleg Nesterov <oleg@...hat.com>
> CC: Andy Lutomirski <luto@...capital.net>
> CC: Pavel Emelyanov <xemul@...allels.com>
> CC: Serge E. Hallyn <serge.hallyn@...ntu.com>
> CC: Alexei Starovoitov <ast@...nel.org>
> CC: Daniel Borkmann <daniel@...earbox.net>
> ---
>  include/uapi/linux/bpf.h |  1 +
>  net/core/filter.c        | 95 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 96 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 29ef6f9..79b825a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -122,6 +122,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_KPROBE,
>         BPF_PROG_TYPE_SCHED_CLS,
>         BPF_PROG_TYPE_SCHED_ACT,
> +       BPF_PROG_TYPE_SECCOMP,
>  };
>
>  #define BPF_PSEUDO_MAP_FD      1
> diff --git a/net/core/filter.c b/net/core/filter.c
> index be3098f..ed339fa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1466,6 +1466,39 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
>         }
>  }
>
> +static const struct bpf_func_proto *
> +seccomp_func_proto(enum bpf_func_id func_id)
> +{
> +       /* Right now seccomp eBPF loading doesn't support maps; seccomp filters
> +        * are considered to be read-only after they're installed, so map fds
> +        * probably need to be invalidated when a seccomp filter with maps is
> +        * installed.
> +        *
> +        * The rest of these might be reasonable to call from seccomp, so we
> +        * export them.
> +        */
> +       switch (func_id) {
> +       case BPF_FUNC_ktime_get_ns:
> +               return &bpf_ktime_get_ns_proto;
> +       case BPF_FUNC_trace_printk:
> +               return bpf_get_trace_printk_proto();
> +       case BPF_FUNC_get_prandom_u32:
> +               return &bpf_get_prandom_u32_proto;
> +       case BPF_FUNC_get_smp_processor_id:
> +               return &bpf_get_smp_processor_id_proto;
> +       case BPF_FUNC_tail_call:
> +               return &bpf_tail_call_proto;
> +       case BPF_FUNC_get_current_pid_tgid:
> +               return &bpf_get_current_pid_tgid_proto;
> +       case BPF_FUNC_get_current_uid_gid:
> +               return &bpf_get_current_uid_gid_proto;
> +       case BPF_FUNC_get_current_comm:
> +               return &bpf_get_current_comm_proto;
> +       default:
> +               return NULL;
> +       }
> +}

While this list is probably fine, I don't want to mix the addition of
eBPF functions to the seccomp ABI with the CRIU changes. No function
calls are currently possible and it should stay that way.

I was expecting to see a validator, similar to the existing BPF
validator that is called when creating seccomp filters currently. Can
we add a similar validator for new BPF_PROG_TYPE_SECCOMP?

-Kees

> +
>  static bool __is_valid_access(int off, int size, enum bpf_access_type type)
>  {
>         /* check bounds */
> @@ -1516,6 +1549,17 @@ static bool tc_cls_act_is_valid_access(int off, int size,
>         return __is_valid_access(off, size, type);
>  }
>
> +static bool seccomp_is_valid_access(int off, int size,
> +                                   enum bpf_access_type type)
> +{
> +       if (type == BPF_WRITE)
> +               return false;
> +
> +       if (off < 0 || off >= sizeof(struct seccomp_data) || off & 3)
> +               return false;
> +
> +       return true;
> +}
>  static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
>                                       int src_reg, int ctx_off,
>                                       struct bpf_insn *insn_buf)
> @@ -1630,6 +1674,45 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
>         return insn - insn_buf;
>  }
>
> +static u32 seccomp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
> +                                     int src_reg, int ctx_off,
> +                                     struct bpf_insn *insn_buf)
> +{
> +       struct bpf_insn *insn = insn_buf;
> +
> +       switch (ctx_off) {
> +       case offsetof(struct seccomp_data, nr):
> +               BUILD_BUG_ON(FIELD_SIZEOF(struct seccomp_data, nr) != 4);
> +
> +               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
> +               break;
> +
> +       case offsetof(struct seccomp_data, arch):
> +               BUILD_BUG_ON(FIELD_SIZEOF(struct seccomp_data, arch) != 4);
> +
> +               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
> +               break;
> +
> +       case offsetof(struct seccomp_data, instruction_pointer):
> +               BUILD_BUG_ON(FIELD_SIZEOF(struct seccomp_data,
> +                                         instruction_pointer) != 8);
> +
> +               *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, src_reg, ctx_off);
> +               break;
> +
> +       default:
> +               if (ctx_off & 7 ||
> +                   ctx_off < offsetof(struct seccomp_data, args))
> +                       return -EINVAL;
> +
> +               BUILD_BUG_ON(FIELD_SIZEOF(struct seccomp_data, args[0]) != 8);
> +
> +               *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, src_reg, ctx_off);
> +       }
> +
> +       return insn - insn_buf;
> +}
> +
>  static const struct bpf_verifier_ops sk_filter_ops = {
>         .get_func_proto = sk_filter_func_proto,
>         .is_valid_access = sk_filter_is_valid_access,
> @@ -1642,6 +1725,12 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
>         .convert_ctx_access = bpf_net_convert_ctx_access,
>  };
>
> +static const struct bpf_verifier_ops seccomp_ops = {
> +       .get_func_proto = seccomp_func_proto,
> +       .is_valid_access = seccomp_is_valid_access,
> +       .convert_ctx_access = seccomp_convert_ctx_access,
> +};
> +
>  static struct bpf_prog_type_list sk_filter_type __read_mostly = {
>         .ops = &sk_filter_ops,
>         .type = BPF_PROG_TYPE_SOCKET_FILTER,
> @@ -1657,11 +1746,17 @@ static struct bpf_prog_type_list sched_act_type __read_mostly = {
>         .type = BPF_PROG_TYPE_SCHED_ACT,
>  };
>
> +static struct bpf_prog_type_list seccomp_type __read_mostly = {
> +       .ops = &seccomp_ops,
> +       .type = BPF_PROG_TYPE_SECCOMP,
> +};
> +
>  static int __init register_sk_filter_ops(void)
>  {
>         bpf_register_prog_type(&sk_filter_type);
>         bpf_register_prog_type(&sched_cls_type);
>         bpf_register_prog_type(&sched_act_type);
> +       bpf_register_prog_type(&seccomp_type);
>
>         return 0;
>  }
> --
> 2.1.4
>



-- 
Kees Cook
Chrome OS Security
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/