[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180509210709.7201-8-joe@wand.net.nz>
Date: Wed, 9 May 2018 14:07:05 -0700
From: Joe Stringer <joe@...d.net.nz>
To: daniel@...earbox.net
Cc: netdev@...r.kernel.org, ast@...nel.org, john.fastabend@...il.com,
kafai@...com
Subject: [RFC bpf-next 07/11] bpf: Add helper to retrieve socket in BPF
This patch adds a new BPF helper function, sk_lookup() which allows BPF
programs to find out if there is a socket listening on this host, and
returns a socket pointer which the BPF program can then access to
determine, for instance, whether to forward or drop traffic. sk_lookup()
takes a reference on the socket, so when a BPF program makes use of this
function, it must subsequently pass the returned pointer into the newly
added sk_release() to return the reference.
By way of example, the following pseudocode would filter inbound
connections at XDP if there is no corresponding service listening for
the traffic:
struct bpf_sock_tuple tuple;
struct bpf_sock_ops *sk;
populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet
sk = bpf_sk_lookup(ctx, &tuple, sizeof tuple, netns, 0);
if (!sk) {
// Couldn't find a socket listening for this traffic. Drop.
return TC_ACT_SHOT;
}
bpf_sk_release(sk, 0);
return TC_ACT_OK;
Signed-off-by: Joe Stringer <joe@...d.net.nz>
---
include/uapi/linux/bpf.h | 39 +++++++++++-
kernel/bpf/verifier.c | 8 ++-
net/core/filter.c | 102 ++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 40 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 7 ++
5 files changed, 193 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d615c777b573..29f38838dbca 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1828,6 +1828,25 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * struct bpf_sock_ops *bpf_sk_lookup(ctx, tuple, tuple_size, netns, flags)
+ * Decription
+ * Look for socket matching 'tuple'. The return value must be checked,
+ * and if non-NULL, released via bpf_sk_release().
+ * @ctx: pointer to ctx
+ * @tuple: pointer to struct bpf_sock_tuple
+ * @tuple_size: size of the tuple
+ * @flags: flags value
+ * Return
+ * pointer to socket ops on success, or
+ * NULL in case of failure
+ *
+ * int bpf_sk_release(sock, flags)
+ * Description
+ * Release the reference held by 'sock'.
+ * @sock: Pointer reference to release. Must be found via bpf_sk_lookup().
+ * @flags: flags value
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1898,7 +1917,9 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
- FN(skb_load_bytes_relative),
+ FN(skb_load_bytes_relative), \
+ FN(sk_lookup), \
+ FN(sk_release),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2060,6 +2081,22 @@ struct bpf_sock {
*/
};
+struct bpf_sock_tuple {
+ union {
+ __be32 ipv6[4];
+ __be32 ipv4;
+ } saddr;
+ union {
+ __be32 ipv6[4];
+ __be32 ipv4;
+ } daddr;
+ __be16 sport;
+ __be16 dport;
+ __u32 dst_if;
+ __u8 family;
+ __u8 proto;
+};
+
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 92b9a5dc465a..579012c483e4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -153,6 +153,12 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
* PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
* passes through a NULL-check conditional. For the branch wherein the state is
* changed to CONST_IMM, the verifier releases the reference.
+ *
+ * For each helper function that allocates a reference, such as bpf_sk_lookup(),
+ * there is a corresponding release function, such as bpf_sk_release(). When
+ * a reference type passes into the release function, the verifier also releases
+ * the reference. If any unchecked or unreleased reference remains at the end of
+ * the program, the verifier rejects it.
*/
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
@@ -277,7 +283,7 @@ static bool arg_type_is_refcounted(enum bpf_arg_type type)
*/
static bool is_release_function(enum bpf_func_id func_id)
{
- return false;
+ return func_id == BPF_FUNC_sk_release;
}
/* string representation of 'enum bpf_reg_type' */
diff --git a/net/core/filter.c b/net/core/filter.c
index 4c35152fb3a8..751c255d17d3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -58,8 +58,12 @@
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <net/xfrm.h>
+#include <net/udp.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
+#include <net/net_namespace.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -4032,6 +4036,96 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
};
#endif
+struct sock *
+sk_lookup(struct net *net, struct bpf_sock_tuple *tuple) {
+ int dst_if = (int)tuple->dst_if;
+ struct in6_addr *src6;
+ struct in6_addr *dst6;
+
+ if (tuple->family == AF_INET6) {
+ src6 = (struct in6_addr *)&tuple->saddr.ipv6;
+ dst6 = (struct in6_addr *)&tuple->daddr.ipv6;
+ } else if (tuple->family != AF_INET) {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (tuple->proto == IPPROTO_TCP) {
+ if (tuple->family == AF_INET)
+ return inet_lookup(net, &tcp_hashinfo, NULL, 0,
+ tuple->saddr.ipv4, tuple->sport,
+ tuple->daddr.ipv4, tuple->dport,
+ dst_if);
+ else
+ return inet6_lookup(net, &tcp_hashinfo, NULL, 0,
+ src6, tuple->sport,
+ dst6, tuple->dport, dst_if);
+ } else if (tuple->proto == IPPROTO_UDP) {
+ if (tuple->family == AF_INET)
+ return udp4_lib_lookup(net, tuple->saddr.ipv4,
+ tuple->sport, tuple->daddr.ipv4,
+ tuple->dport, dst_if);
+ else
+ return udp6_lib_lookup(net, src6, tuple->sport,
+ dst6, tuple->dport, dst_if);
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return NULL;
+}
+
+BPF_CALL_5(bpf_sk_lookup, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(skb->dev);
+ struct sock *sk = NULL;
+ struct net *net;
+
+ /* XXX: Perform verification-time checking of tuple size? */
+ if (unlikely(len != sizeof(struct bpf_sock_tuple) || flags))
+ goto out;
+
+ net = get_net_ns_by_id(caller_net, netns_id);
+ if (unlikely(!net))
+ goto out;
+
+ sk = sk_lookup(net, tuple);
+ put_net(net);
+ if (IS_ERR_OR_NULL(sk))
+ sk = NULL;
+ else
+ sk = sk_to_full_sk(sk);
+out:
+ return (unsigned long) sk;
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_proto = {
+ .func = bpf_sk_lookup,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_sk_release, struct sock *, sk, u64, flags)
+{
+ sock_gen_put(sk);
+ if (unlikely(flags))
+ return -EINVAL;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_release_proto = {
+ .func = bpf_sk_release,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -4181,6 +4275,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
+ case BPF_FUNC_sk_lookup:
+ return &bpf_sk_lookup_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4292,6 +4390,10 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_sk_redirect_map:
return &bpf_sk_redirect_map_proto;
+ case BPF_FUNC_sk_lookup:
+ return &bpf_sk_lookup_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fff51c187d1e..29f38838dbca 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
};
enum bpf_prog_type {
@@ -1827,6 +1828,25 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * struct bpf_sock_ops *bpf_sk_lookup(ctx, tuple, tuple_size, netns, flags)
+ * Decription
+ * Look for socket matching 'tuple'. The return value must be checked,
+ * and if non-NULL, released via bpf_sk_release().
+ * @ctx: pointer to ctx
+ * @tuple: pointer to struct bpf_sock_tuple
+ * @tuple_size: size of the tuple
+ * @flags: flags value
+ * Return
+ * pointer to socket ops on success, or
+ * NULL in case of failure
+ *
+ * int bpf_sk_release(sock, flags)
+ * Description
+ * Release the reference held by 'sock'.
+ * @sock: Pointer reference to release. Must be found via bpf_sk_lookup().
+ * @flags: flags value
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1897,7 +1917,9 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
- FN(skb_load_bytes_relative),
+ FN(skb_load_bytes_relative), \
+ FN(sk_lookup), \
+ FN(sk_release),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2059,6 +2081,22 @@ struct bpf_sock {
*/
};
+struct bpf_sock_tuple {
+ union {
+ __be32 ipv6[4];
+ __be32 ipv4;
+ } saddr;
+ union {
+ __be32 ipv6[4];
+ __be32 ipv4;
+ } daddr;
+ __be16 sport;
+ __be16 dport;
+ __u32 dst_if;
+ __u8 family;
+ __u8 proto;
+};
+
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 265f8e0e8ada..4dc311ea0c16 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -103,6 +103,13 @@ static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
(void *) BPF_FUNC_skb_get_xfrm_state;
static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
(void *) BPF_FUNC_get_stack;
+static struct bpf_sock *(*bpf_sk_lookup)(void *ctx,
+ struct bpf_sock_tuple *tuple,
+ int size, unsigned int netns_id,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sk_lookup;
+static int (*bpf_sk_release)(struct bpf_sock *sk, unsigned long long flags) =
+ (void *) BPF_FUNC_sk_release;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.14.1
Powered by blists - more mailing lists