[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20211019144655.3483197-8-maximmi@nvidia.com>
Date: Tue, 19 Oct 2021 17:46:52 +0300
From: Maxim Mikityanskiy <maximmi@...dia.com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>
CC: Eric Dumazet <edumazet@...gle.com>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
David Ahern <dsahern@...nel.org>,
"Jesper Dangaard Brouer" <hawk@...nel.org>,
Nathan Chancellor <nathan@...nel.org>,
"Nick Desaulniers" <ndesaulniers@...gle.com>,
Brendan Jackman <jackmanb@...gle.com>,
Florent Revest <revest@...omium.org>,
Joe Stringer <joe@...ium.io>,
"Lorenz Bauer" <lmb@...udflare.com>,
Tariq Toukan <tariqt@...dia.com>, <netdev@...r.kernel.org>,
<bpf@...r.kernel.org>, <clang-built-linux@...glegroups.com>,
Maxim Mikityanskiy <maximmi@...dia.com>
Subject: [PATCH bpf-next 07/10] bpf: Add helpers to query conntrack info
The new helpers (bpf_ct_lookup_tcp and bpf_ct_lookup_udp) allow to query
connection tracking information of TCP and UDP connections based on
source and destination IP address and port. The helper returns a pointer
to struct nf_conn (if the conntrack entry was found), which needs to be
released with bpf_ct_release.
Signed-off-by: Maxim Mikityanskiy <maximmi@...dia.com>
Reviewed-by: Tariq Toukan <tariqt@...dia.com>
---
include/uapi/linux/bpf.h | 81 +++++++++++++
kernel/bpf/verifier.c | 9 +-
net/core/filter.c | 205 +++++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 81 +++++++++++++
4 files changed, 373 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a10a44c4f79b..883de3f1bb8b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4925,6 +4925,79 @@ union bpf_attr {
* Return
* The number of bytes written to the buffer, or a negative error
* in case of failure.
+ *
+ * struct bpf_nf_conn *bpf_ct_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err)
+ * Description
+ * Look for conntrack info for a TCP connection matching *tuple*,
+ * optionally in a child network namespace *netns*.
+ *
+ * The *flags_err* argument is used as an input parameter for flags
+ * and output parameter for the error code. The flags can be a
+ * combination of one or more of the following values:
+ *
+ * **BPF_F_CT_DIR_REPLY**
+ * When set, the conntrack direction is IP_CT_DIR_REPLY,
+ * otherwise IP_CT_DIR_ORIGINAL.
+ *
+ * If the function returns **NULL**, *flags_err* will indicate the
+ * error code:
+ *
+ * **EAFNOSUPPORT**
+ * *tuple_size* doesn't match supported address families
+ * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled).
+ *
+ * **EINVAL**
+ * Input arguments are not valid.
+ *
+ * **ENOENT**
+ * Connection tracking entry for *tuple* wasn't found.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NF_CONNTRACK** configuration option as built-in.
+ * Return
+ * Connection tracking status (see **enum ip_conntrack_status**),
+ * or **NULL** in case of failure or if there is no conntrack entry
+ * for this tuple.
+ *
+ * struct bpf_nf_conn *bpf_ct_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err)
+ * Description
+ * Look for conntrack info for a UDP connection matching *tuple*,
+ * optionally in a child network namespace *netns*.
+ *
+ * The *flags_err* argument is used as an input parameter for flags
+ * and output parameter for the error code. The flags can be a
+ * combination of one or more of the following values:
+ *
+ * **BPF_F_CT_DIR_REPLY**
+ * When set, the conntrack direction is IP_CT_DIR_REPLY,
+ * otherwise IP_CT_DIR_ORIGINAL.
+ *
+ * If the function returns **NULL**, *flags_err* will indicate the
+ * error code:
+ *
+ * **EAFNOSUPPORT**
+ * *tuple_size* doesn't match supported address families
+ * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled).
+ *
+ * **EINVAL**
+ * Input arguments are not valid.
+ *
+ * **ENOENT**
+ * Connection tracking entry for *tuple* wasn't found.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NF_CONNTRACK** configuration option as built-in.
+ * Return
+ * Connection tracking status (see **enum ip_conntrack_status**),
+ * or **NULL** in case of failure or if there is no conntrack entry
+ * for this tuple.
+ *
+ * long bpf_ct_release(void *ct)
+ * Description
+ * Release the reference held by *ct*. *ct* must be a non-**NULL**
+ * pointer that was returned from **bpf_ct_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5105,6 +5178,9 @@ union bpf_attr {
FN(task_pt_regs), \
FN(get_branch_snapshot), \
FN(trace_vprintk), \
+ FN(ct_lookup_tcp), \
+ FN(ct_lookup_udp), \
+ FN(ct_release), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5288,6 +5364,11 @@ enum {
BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
};
+/* Flags for bpf_ct_lookup_{tcp,udp} helpers. */
+enum {
+ BPF_F_CT_DIR_REPLY = (1ULL << 0),
+};
+
#define __bpf_md_ptr(type, name) \
union { \
type name; \
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6eafef35e247..23e2a23ca9c4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -506,7 +506,8 @@ static bool is_release_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_release ||
func_id == BPF_FUNC_ringbuf_submit ||
- func_id == BPF_FUNC_ringbuf_discard;
+ func_id == BPF_FUNC_ringbuf_discard ||
+ func_id == BPF_FUNC_ct_release;
}
static bool may_be_acquire_function(enum bpf_func_id func_id)
@@ -515,7 +516,8 @@ static bool may_be_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp ||
func_id == BPF_FUNC_map_lookup_elem ||
- func_id == BPF_FUNC_ringbuf_reserve;
+ func_id == BPF_FUNC_ringbuf_reserve ||
+ func_id == BPF_FUNC_ct_lookup_tcp;
}
static bool is_acquire_function(enum bpf_func_id func_id,
@@ -526,7 +528,8 @@ static bool is_acquire_function(enum bpf_func_id func_id,
if (func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp ||
- func_id == BPF_FUNC_ringbuf_reserve)
+ func_id == BPF_FUNC_ringbuf_reserve ||
+ func_id == BPF_FUNC_ct_lookup_tcp)
return true;
if (func_id == BPF_FUNC_map_lookup_elem &&
diff --git a/net/core/filter.c b/net/core/filter.c
index d2d07ccae599..f913851c97f7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -79,6 +79,7 @@
#include <net/tls.h>
#include <net/xdp.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -7096,6 +7097,194 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING,
};
+#if IS_BUILTIN(CONFIG_NF_CONNTRACK)
+static struct nf_conn *bpf_ct_lookup(struct net *caller_net,
+ struct bpf_sock_tuple *tuple,
+ u32 tuple_len,
+ u8 protonum,
+ u64 netns_id,
+ u64 flags)
+{
+ struct nf_conntrack_tuple ct_tuple = {};
+ struct nf_conntrack_tuple_hash *found;
+ struct net *net;
+ u8 direction;
+
+ direction = flags & BPF_F_CT_DIR_REPLY ? IP_CT_DIR_REPLY :
+ IP_CT_DIR_ORIGINAL;
+
+ if (flags & ~BPF_F_CT_DIR_REPLY)
+ return ERR_PTR(-EINVAL);
+
+ if (tuple_len == sizeof(tuple->ipv4)) {
+ ct_tuple.src.l3num = AF_INET;
+ ct_tuple.src.u3.ip = tuple->ipv4.saddr;
+ ct_tuple.src.u.tcp.port = tuple->ipv4.sport;
+ ct_tuple.dst.u3.ip = tuple->ipv4.daddr;
+ ct_tuple.dst.u.tcp.port = tuple->ipv4.dport;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (tuple_len == sizeof(tuple->ipv6)) {
+ ct_tuple.src.l3num = AF_INET6;
+ memcpy(ct_tuple.src.u3.ip6, tuple->ipv6.saddr,
+ sizeof(tuple->ipv6.saddr));
+ ct_tuple.src.u.tcp.port = tuple->ipv6.sport;
+ memcpy(ct_tuple.dst.u3.ip6, tuple->ipv6.daddr,
+ sizeof(tuple->ipv6.daddr));
+ ct_tuple.dst.u.tcp.port = tuple->ipv6.dport;
+#endif
+ } else {
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ ct_tuple.dst.protonum = protonum;
+ ct_tuple.dst.dir = direction;
+
+ net = caller_net;
+ if ((s32)netns_id >= 0) {
+ if (unlikely(netns_id > S32_MAX))
+ return ERR_PTR(-EINVAL);
+ net = get_net_ns_by_id(net, netns_id);
+ if (!net)
+ return ERR_PTR(-EINVAL);
+ }
+
+ found = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &ct_tuple);
+
+ if ((s32)netns_id >= 0)
+ put_net(net);
+
+ if (!found)
+ return ERR_PTR(-ENOENT);
+ return nf_ct_tuplehash_to_ctrack(found);
+}
+
+BPF_CALL_5(bpf_xdp_ct_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, tuple_len,
+ u64, netns_id, u64 *, flags_err)
+{
+ struct nf_conn *ct;
+
+ ct = bpf_ct_lookup(dev_net(ctx->rxq->dev), tuple, tuple_len,
+ IPPROTO_TCP, netns_id, *flags_err);
+ if (IS_ERR(ct)) {
+ *flags_err = PTR_ERR(ct);
+ return (unsigned long)NULL;
+ }
+ return (unsigned long)ct;
+}
+
+static const struct bpf_func_proto bpf_xdp_ct_lookup_tcp_proto = {
+ .func = bpf_xdp_ct_lookup_tcp,
+ .gpl_only = true, /* nf_conntrack_find_get is GPL */
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_NF_CONN_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_5(bpf_xdp_ct_lookup_udp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, tuple_len,
+ u64, netns_id, u64 *, flags_err)
+{
+ struct nf_conn *ct;
+
+ ct = bpf_ct_lookup(dev_net(ctx->rxq->dev), tuple, tuple_len,
+ IPPROTO_UDP, netns_id, *flags_err);
+ if (IS_ERR(ct)) {
+ *flags_err = PTR_ERR(ct);
+ return (unsigned long)NULL;
+ }
+ return (unsigned long)ct;
+}
+
+static const struct bpf_func_proto bpf_xdp_ct_lookup_udp_proto = {
+ .func = bpf_xdp_ct_lookup_udp,
+ .gpl_only = true, /* nf_conntrack_find_get is GPL */
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_NF_CONN_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_5(bpf_skb_ct_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, tuple_len,
+ u64, netns_id, u64 *, flags_err)
+{
+ struct net *caller_net;
+ struct nf_conn *ct;
+
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ ct = bpf_ct_lookup(caller_net, tuple, tuple_len, IPPROTO_TCP,
+ netns_id, *flags_err);
+ if (IS_ERR(ct)) {
+ *flags_err = PTR_ERR(ct);
+ return (unsigned long)NULL;
+ }
+ return (unsigned long)ct;
+}
+
+static const struct bpf_func_proto bpf_skb_ct_lookup_tcp_proto = {
+ .func = bpf_skb_ct_lookup_tcp,
+ .gpl_only = true, /* nf_conntrack_find_get is GPL */
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_NF_CONN_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_5(bpf_skb_ct_lookup_udp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, tuple_len,
+ u64, netns_id, u64 *, flags_err)
+{
+ struct net *caller_net;
+ struct nf_conn *ct;
+
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ ct = bpf_ct_lookup(caller_net, tuple, tuple_len, IPPROTO_UDP,
+ netns_id, *flags_err);
+ if (IS_ERR(ct)) {
+ *flags_err = PTR_ERR(ct);
+ return (unsigned long)NULL;
+ }
+ return (unsigned long)ct;
+}
+
+static const struct bpf_func_proto bpf_skb_ct_lookup_udp_proto = {
+ .func = bpf_skb_ct_lookup_udp,
+ .gpl_only = true, /* nf_conntrack_find_get is GPL */
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_NF_CONN_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_1(bpf_ct_release, struct nf_conn *, ct)
+{
+ nf_ct_put(ct);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_ct_release_proto = {
+ .func = bpf_ct_release,
+ .gpl_only = false,
+ .pkt_access = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_NF_CONN,
+};
+#endif
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7455,6 +7644,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto;
+#if IS_BUILTIN(CONFIG_NF_CONNTRACK)
+ case BPF_FUNC_ct_lookup_tcp:
+ return &bpf_skb_ct_lookup_tcp_proto;
+ case BPF_FUNC_ct_lookup_udp:
+ return &bpf_skb_ct_lookup_udp_proto;
+ case BPF_FUNC_ct_release:
+ return &bpf_ct_release_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7498,6 +7695,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
+#if IS_BUILTIN(CONFIG_NF_CONNTRACK)
+ case BPF_FUNC_ct_lookup_tcp:
+ return &bpf_xdp_ct_lookup_tcp_proto;
+ case BPF_FUNC_ct_lookup_udp:
+ return &bpf_xdp_ct_lookup_udp_proto;
+ case BPF_FUNC_ct_release:
+ return &bpf_ct_release_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a10a44c4f79b..883de3f1bb8b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4925,6 +4925,79 @@ union bpf_attr {
* Return
* The number of bytes written to the buffer, or a negative error
* in case of failure.
+ *
+ * struct bpf_nf_conn *bpf_ct_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err)
+ * Description
+ * Look for conntrack info for a TCP connection matching *tuple*,
+ * optionally in a child network namespace *netns*.
+ *
+ * The *flags_err* argument is used as an input parameter for flags
+ * and output parameter for the error code. The flags can be a
+ * combination of one or more of the following values:
+ *
+ * **BPF_F_CT_DIR_REPLY**
+ * When set, the conntrack direction is IP_CT_DIR_REPLY,
+ * otherwise IP_CT_DIR_ORIGINAL.
+ *
+ * If the function returns **NULL**, *flags_err* will indicate the
+ * error code:
+ *
+ * **EAFNOSUPPORT**
+ * *tuple_size* doesn't match supported address families
+ * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled).
+ *
+ * **EINVAL**
+ * Input arguments are not valid.
+ *
+ * **ENOENT**
+ * Connection tracking entry for *tuple* wasn't found.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NF_CONNTRACK** configuration option as built-in.
+ * Return
+ * Connection tracking status (see **enum ip_conntrack_status**),
+ * or **NULL** in case of failure or if there is no conntrack entry
+ * for this tuple.
+ *
+ * struct bpf_nf_conn *bpf_ct_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err)
+ * Description
+ * Look for conntrack info for a UDP connection matching *tuple*,
+ * optionally in a child network namespace *netns*.
+ *
+ * The *flags_err* argument is used as an input parameter for flags
+ * and output parameter for the error code. The flags can be a
+ * combination of one or more of the following values:
+ *
+ * **BPF_F_CT_DIR_REPLY**
+ * When set, the conntrack direction is IP_CT_DIR_REPLY,
+ * otherwise IP_CT_DIR_ORIGINAL.
+ *
+ * If the function returns **NULL**, *flags_err* will indicate the
+ * error code:
+ *
+ * **EAFNOSUPPORT**
+ * *tuple_size* doesn't match supported address families
+ * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled).
+ *
+ * **EINVAL**
+ * Input arguments are not valid.
+ *
+ * **ENOENT**
+ * Connection tracking entry for *tuple* wasn't found.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NF_CONNTRACK** configuration option as built-in.
+ * Return
+ * Connection tracking status (see **enum ip_conntrack_status**),
+ * or **NULL** in case of failure or if there is no conntrack entry
+ * for this tuple.
+ *
+ * long bpf_ct_release(void *ct)
+ * Description
+ * Release the reference held by *ct*. *ct* must be a non-**NULL**
+ * pointer that was returned from **bpf_ct_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5105,6 +5178,9 @@ union bpf_attr {
FN(task_pt_regs), \
FN(get_branch_snapshot), \
FN(trace_vprintk), \
+ FN(ct_lookup_tcp), \
+ FN(ct_lookup_udp), \
+ FN(ct_release), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5288,6 +5364,11 @@ enum {
BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
};
+/* Flags for bpf_ct_lookup_{tcp,udp} helpers. */
+enum {
+ BPF_F_CT_DIR_REPLY = (1ULL << 0),
+};
+
#define __bpf_md_ptr(type, name) \
union { \
type name; \
--
2.30.2
Powered by blists - more mailing lists