netdev - [PATCH RFC bpf-next 6/6] bpf: Post-hooks for sys

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180314033934.3502167-7-ast@kernel.org>
Date:   Tue, 13 Mar 2018 20:39:34 -0700
From:   Alexei Starovoitov <ast@...nel.org>
To:     <davem@...emloft.net>
CC:     <daniel@...earbox.net>, <netdev@...r.kernel.org>,
        <kernel-team@...com>
Subject: [PATCH RFC bpf-next 6/6] bpf: Post-hooks for sys_bind

From: Andrey Ignatov <rdna@...com>

"Post-hooks" are hooks that are called right before returning from
sys_bind. At this time IP and port are already allocated and no further
changes to `struct sock` can happen before returning from sys_bind but
BPF program has a chance to inspect the socket and change sys_bind
result.

Specifically it can e.g. inspect what port was allocated and if it
doesn't satisfy some policy, BPF program can force sys_bind to release
that port and return an error to user.

Another example of usage is recording the IP:port pair to some map to
use it in later calls to sys_connect. E.g. if some TCP server inside
cgroup was bound to some IP:port and then some TCP client inside same
cgroup is trying to connect to 127.0.0.1:port then BPF hook for
sys_connect can override the destination and connect application to
IP:port instead of 127.0.0.1:port. That helps forcing all applications
inside a cgroup to use desired IP and not break those applications if
they user e.g. localhost to communicate between each other.

== Implementation details ==

Post-hooks are implemented as two new prog types
`BPF_PROG_TYPE_CGROUP_INET4_POST_BIND` and
`BPF_PROG_TYPE_CGROUP_INET6_POST_BIND` and corresponding attach types
`BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND`.

Separate prog types for IPv4 and IPv6 are introduced to avoid access to
IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from
`inet6_bind()` since those fields might not make sense in such cases.

`BPF_PROG_TYPE_CGROUP_SOCK` prog type is not reused because it provides
write access to some `struct sock` fields, but socket must not be
changed in post-hooks for sys_bind.

Signed-off-by: Andrey Ignatov <rdna@...com>
Signed-off-by: Alexei Starovoitov <ast@...nel.org>
---
 include/linux/bpf-cgroup.h |  16 ++++-
 include/linux/bpf_types.h  |   2 +
 include/uapi/linux/bpf.h   |  13 ++++
 kernel/bpf/syscall.c       |  14 ++++
 kernel/bpf/verifier.c      |   2 +
 net/core/filter.c          | 170 ++++++++++++++++++++++++++++++++++++++++-----
 net/ipv4/af_inet.c         |   3 +-
 net/ipv6/af_inet6.c        |   3 +-
 8 files changed, 202 insertions(+), 21 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 6b5c25ef1482..693c542632e3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+#define BPF_CGROUP_RUN_SK_PROG(sk, type)				       \
 ({									       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled) {					       \
-		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
-						 BPF_CGROUP_INET_SOCK_CREATE); \
+		__ret = __cgroup_bpf_run_filter_sk(sk, type);		       \
 	}								       \
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+
 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) 			       \
 ({									       \
 	int __ret = 0;							       \
@@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 52a571827b9f..23a97978b544 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET4_BIND, cg_inet4_bind)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET6_BIND, cg_inet6_bind)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET4_POST_BIND, cg_inet4_post_bind)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET6_POST_BIND, cg_inet6_post_bind)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET4_CONNECT, cg_inet4_connect)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_INET6_CONNECT, cg_inet6_connect)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 441a674f385a..7dcc75a65a97 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -137,6 +137,8 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_INET6_BIND,
 	BPF_PROG_TYPE_CGROUP_INET4_CONNECT,
 	BPF_PROG_TYPE_CGROUP_INET6_CONNECT,
+	BPF_PROG_TYPE_CGROUP_INET4_POST_BIND,
+	BPF_PROG_TYPE_CGROUP_INET6_POST_BIND,
 };
 
 enum bpf_attach_type {
@@ -151,6 +153,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_INET4_POST_BIND,
+	BPF_CGROUP_INET6_POST_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -903,6 +907,15 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
+	__u32 src_ip4;		/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_port;		/* Allows 4-byte read.
+				 * Stored in network byte order
+				 */
 };
 
 #define XDP_PACKET_HEADROOM 256
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 145de3332e32..2eb941dacbc5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1382,6 +1382,12 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET6_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_INET6_BIND;
 		break;
+	case BPF_CGROUP_INET4_POST_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_INET4_POST_BIND;
+		break;
+	case BPF_CGROUP_INET6_POST_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_INET6_POST_BIND;
+		break;
 	case BPF_CGROUP_INET4_CONNECT:
 		ptype = BPF_PROG_TYPE_CGROUP_INET4_CONNECT;
 		break;
@@ -1449,6 +1455,12 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET6_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_INET6_BIND;
 		break;
+	case BPF_CGROUP_INET4_POST_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_INET4_POST_BIND;
+		break;
+	case BPF_CGROUP_INET6_POST_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_INET6_POST_BIND;
+		break;
 	case BPF_CGROUP_INET4_CONNECT:
 		ptype = BPF_PROG_TYPE_CGROUP_INET4_CONNECT;
 		break;
@@ -1504,6 +1516,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_SOCK_CREATE:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_POST_BIND:
+	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
 	case BPF_CGROUP_SOCK_OPS:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cda7830a2c1b..84faec85fe3e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3874,6 +3874,8 @@ static int check_return_code(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_CGROUP_SOCK:
 	case BPF_PROG_TYPE_CGROUP_INET4_BIND:
 	case BPF_PROG_TYPE_CGROUP_INET6_BIND:
+	case BPF_PROG_TYPE_CGROUP_INET4_POST_BIND:
+	case BPF_PROG_TYPE_CGROUP_INET6_POST_BIND:
 	case BPF_PROG_TYPE_CGROUP_INET4_CONNECT:
 	case BPF_PROG_TYPE_CGROUP_INET6_CONNECT:
 	case BPF_PROG_TYPE_SOCK_OPS:
diff --git a/net/core/filter.c b/net/core/filter.c
index 916195b86a23..e27196248c10 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3840,6 +3840,62 @@ static bool sock_filter_is_valid_access(int off, int size,
 	return true;
 }
 
+static bool __sock_is_valid_access(unsigned short ctx_family, int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	const int size_default = sizeof(__u32);
+	unsigned short requested_family = 0;
+
+	if (off < 0 || off >= sizeof(struct bpf_sock))
+		return false;
+	if (off % size != 0)
+		return false;
+	if (type != BPF_READ)
+		return false;
+
+	switch (off) {
+	case bpf_ctx_range(struct bpf_sock, src_ip4):
+		requested_family = AF_INET;
+		/* FALLTHROUGH */
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+		if (!requested_family)
+			requested_family = AF_INET6;
+		/* Disallow access to IPv6 fields from IPv4 contex and vise
+		 * versa.
+		 */
+		if (requested_family != ctx_family)
+			return false;
+		bpf_ctx_record_field_size(info, size_default);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+			return false;
+		break;
+	case bpf_ctx_range(struct bpf_sock, family):
+	case bpf_ctx_range(struct bpf_sock, type):
+	case bpf_ctx_range(struct bpf_sock, protocol):
+	case bpf_ctx_range(struct bpf_sock, src_port):
+		if (size != size_default)
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static bool sock4_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	return __sock_is_valid_access(AF_INET, off, size, type, info);
+}
+
+static bool sock6_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	return __sock_is_valid_access(AF_INET6, off, size, type, info);
+}
+
 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
 				const struct bpf_prog *prog, int drop_verdict)
 {
@@ -4406,6 +4462,40 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 __sock_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct bpf_sock, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, sk_family));
+		break;
+
+	case offsetof(struct bpf_sock, type):
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, __sk_flags_offset));
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		break;
+
+	case offsetof(struct bpf_sock, protocol):
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, __sk_flags_offset));
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+					SK_FL_PROTO_SHIFT);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 					  const struct bpf_insn *si,
 					  struct bpf_insn *insn_buf,
@@ -4447,26 +4537,56 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct sock, sk_priority));
 		break;
 
-	case offsetof(struct bpf_sock, family):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
+	default:
+		return __sock_convert_ctx_access(type, si, insn_buf, prog,
+						 target_size);
+	}
 
-		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, sk_family));
-		break;
+	return insn - insn_buf;
+}
 
-	case offsetof(struct bpf_sock, type):
-		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, __sk_flags_offset));
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
-		break;
+static u32 sock_convert_ctx_access(enum bpf_access_type type,
+				   const struct bpf_insn *si,
+				   struct bpf_insn *insn_buf,
+				   struct bpf_prog *prog,
+				   u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
 
-	case offsetof(struct bpf_sock, protocol):
-		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, __sk_flags_offset));
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+	switch (si->off) {
+	case offsetof(struct bpf_sock, src_ip4):
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_rcv_saddr,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_rcv_saddr),
+				       target_size));
 		break;
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+		off = si->off;
+		off -= offsetof(struct bpf_sock, src_ip6[0]);
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(
+				struct sock_common,
+				skc_v6_rcv_saddr.s6_addr32[0],
+				FIELD_SIZEOF(struct sock_common,
+					     skc_v6_rcv_saddr.s6_addr32[0]),
+				target_size) + off);
+		break;
+	case offsetof(struct bpf_sock, src_port):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_num,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_num),
+				       target_size));
+		break;
+	default:
+		return __sock_convert_ctx_access(type, si, insn_buf, prog,
+						 target_size);
 	}
 
 	return insn - insn_buf;
@@ -5122,6 +5242,24 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
 const struct bpf_prog_ops cg_sock_prog_ops = {
 };
 
+const struct bpf_verifier_ops cg_inet4_post_bind_verifier_ops = {
+	.get_func_proto		= sock_filter_func_proto,
+	.is_valid_access	= sock4_is_valid_access,
+	.convert_ctx_access	= sock_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_inet4_post_bind_prog_ops = {
+};
+
+const struct bpf_verifier_ops cg_inet6_post_bind_verifier_ops = {
+	.get_func_proto		= sock_filter_func_proto,
+	.is_valid_access	= sock6_is_valid_access,
+	.convert_ctx_access	= sock_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_inet6_post_bind_prog_ops = {
+};
+
 const struct bpf_verifier_ops cg_inet4_bind_verifier_ops = {
 	.get_func_proto		= inet_bind_func_proto,
 	.is_valid_access	= sock_addr4_is_valid_access,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 488fe26ac8e5..28e2e7fdd5b1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -521,7 +521,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	/* Make sure we are allowed to bind here. */
 	if ((snum || !(inet->bind_address_no_port ||
 		       force_bind_address_no_port)) &&
-	    sk->sk_prot->get_port(sk, snum)) {
+	    (sk->sk_prot->get_port(sk, snum) ||
+	     BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk))) {
 		inet->inet_saddr = inet->inet_rcv_saddr = 0;
 		err = -EADDRINUSE;
 		goto out_release_sock;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13110bee5c14..473cc55a3a7d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -414,7 +414,8 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	/* Make sure we are allowed to bind here. */
 	if ((snum || !(inet->bind_address_no_port ||
 		       force_bind_address_no_port)) &&
-	    sk->sk_prot->get_port(sk, snum)) {
+	    (sk->sk_prot->get_port(sk, snum) ||
+	     BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk))) {
 		sk->sk_ipv6only = saved_ipv6only;
 		inet_reset_saddr(sk);
 		err = -EADDRINUSE;
-- 
2.9.5