[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211217015031.1278167-8-memxor@gmail.com>
Date: Fri, 17 Dec 2021 07:20:28 +0530
From: Kumar Kartikeya Dwivedi <memxor@...il.com>
To: bpf@...r.kernel.org, netdev@...r.kernel.org,
netfilter-devel@...r.kernel.org
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
Maxim Mikityanskiy <maximmi@...dia.com>,
Pablo Neira Ayuso <pablo@...filter.org>,
Florian Westphal <fw@...len.de>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: [PATCH bpf-next v4 07/10] net/netfilter: Add unstable CT lookup helpers for XDP and TC-BPF
This change adds conntrack lookup helpers using the unstable kfunc call
interface for the XDP and TC-BPF hooks. The primary usecase is
implementing a synproxy in XDP, see Maxim's patchset at [0].
Export get_net_ns_by_id as nf_conntrack needs to call it.
Note that we search for acquire, release, and null returning kfuncs in
the intersection of those sets and main set.
This implies that the kfunc_btf_id_list acq_set, rel_set, null_set may
contain BTF ID not in main set, this is explicitly allowed and
recommended (to save on definining more and more sets), since
check_kfunc_call verifier operation would filter out the invalid BTF ID
fairly early, so later checks for acquire, release, and ret_type_null
kfunc will only consider allowed BTF IDs for that program that are
allowed in main set. This is why the nf_conntrack_acq_ids set has BTF
IDs for both xdp and tc hook kfuncs.
[0]: https://lore.kernel.org/bpf/20211019144655.3483197-1-maximmi@nvidia.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@...il.com>
---
include/linux/btf.h | 2 +
kernel/bpf/btf.c | 1 +
net/core/filter.c | 24 +++
net/core/net_namespace.c | 1 +
net/netfilter/nf_conntrack_core.c | 278 ++++++++++++++++++++++++++++++
5 files changed, 306 insertions(+)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 64c3784799c5..289d9db6748b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -363,6 +363,7 @@ bool bpf_is_mod_kfunc_ret_type_null(struct kfunc_btf_id_list *klist,
extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
extern struct kfunc_btf_id_list prog_test_kfunc_list;
+extern struct kfunc_btf_id_list xdp_kfunc_list;
#else
static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
struct kfunc_btf_id_set *s)
@@ -396,6 +397,7 @@ bpf_is_mod_kfunc_ret_type_null(struct kfunc_btf_id_list *klist, u32 kfunc_id,
static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
+static struct kfunc_btf_id_list xdp_kfunc_list __maybe_unused;
#endif
#endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4983b54c1d81..bce1f98177b9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6558,6 +6558,7 @@ bool bpf_is_mod_kfunc_ret_type_null(struct kfunc_btf_id_list *klist,
DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+DEFINE_KFUNC_BTF_ID_LIST(xdp_kfunc_list);
#endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 3f656391af7e..e5efacaa6175 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10008,11 +10008,35 @@ const struct bpf_prog_ops tc_cls_act_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
+static bool xdp_check_kfunc_call(u32 kfunc_id, struct module *owner)
+{
+ return bpf_check_mod_kfunc_call(&xdp_kfunc_list, kfunc_id, owner);
+}
+
+static bool xdp_is_acquire_kfunc(u32 kfunc_id, struct module *owner)
+{
+ return bpf_is_mod_acquire_kfunc(&xdp_kfunc_list, kfunc_id, owner);
+}
+
+static bool xdp_is_release_kfunc(u32 kfunc_id, struct module *owner)
+{
+ return bpf_is_mod_release_kfunc(&xdp_kfunc_list, kfunc_id, owner);
+}
+
+static bool xdp_is_kfunc_ret_type_null(u32 kfunc_id, struct module *owner)
+{
+ return bpf_is_mod_kfunc_ret_type_null(&xdp_kfunc_list, kfunc_id, owner);
+}
+
const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
.gen_prologue = bpf_noop_prologue,
+ .check_kfunc_call = xdp_check_kfunc_call,
+ .is_acquire_kfunc = xdp_is_acquire_kfunc,
+ .is_release_kfunc = xdp_is_release_kfunc,
+ .is_kfunc_ret_type_null = xdp_is_kfunc_ret_type_null,
};
const struct bpf_prog_ops xdp_prog_ops = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b7171c40434..3b471781327f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -299,6 +299,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
return peer;
}
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/*
* setup_net runs the initializers for the network namespace object.
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9fbce31baf75..116e1384e446 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -11,6 +11,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
@@ -2457,8 +2460,280 @@ void nf_conntrack_cleanup_start(void)
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - Passed NULL for bpf_tuple pointer
+ * -EINVAL - opts->reserved is not 0
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - len__opts isn't NF_BPF_CT_OPTS_SZ (12)
+ * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - Conntrack lookup could not find entry for tuple
+ * -EAFNOSUPPORT - len__tuple isn't one of sizeof(tuple->ipv4)
+ * or sizeof(tuple->ipv6)
+ * @l4proto - Layer 4 protocol
+ * Values:
+ * IPPROTO_TCP, IPPROTO_UDP
+ * @reserved - Reserved member, will be reused for more options in future
+ * Values:
+ * 0
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx (xdp_md, __sk_buff)
+ * [0, S32_MAX]
+ * Network Namespace ID
+ */
+struct bpf_ct_opts {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+};
+
+enum {
+ NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, u8 protonum,
+ s32 netns_id)
+{
+ struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple tuple;
+
+ if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+ return ERR_PTR(-EPROTO);
+ if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ memset(&tuple, 0, sizeof(tuple));
+ switch (tuple_len) {
+ case sizeof(bpf_tuple->ipv4):
+ tuple.src.l3num = AF_INET;
+ tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+ tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+ tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+ tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+ break;
+ case sizeof(bpf_tuple->ipv6):
+ tuple.src.l3num = AF_INET6;
+ memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+ tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+ memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+ tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ tuple.dst.protonum = protonum;
+
+ if (netns_id >= 0) {
+ net = get_net_ns_by_id(net, netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
+ }
+
+ hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (netns_id >= 0)
+ put_net(net);
+ if (!hash)
+ return ERR_PTR(-ENOENT);
+ return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @len__tuple - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @len__opts - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 len__tuple, struct bpf_ct_opts *opts, u32 len__opts)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || len__opts != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = dev_net(ctx->rxq->dev);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, len__tuple, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @len__tuple - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @len__opts - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 len__tuple, struct bpf_ct_opts *opts, u32 len__opts)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || len__opts != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, len__tuple, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+ if (!nfct)
+ return;
+ nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+/* XDP hook allowed kfuncs */
+BTF_SET_START(bpf_nf_ct_xdp_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(bpf_nf_ct_xdp_ids)
+
+/* TC-BPF hook allowed kfuncs */
+BTF_SET_START(bpf_nf_ct_skb_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(bpf_nf_ct_skb_ids)
+
+/* XDP and TC-BPF hook acquire kfuncs */
+BTF_SET_START(bpf_nf_ct_acq_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(bpf_nf_ct_acq_ids)
+
+/* XDP and TC-BPF hook release kfuncs */
+BTF_SET_START(bpf_nf_ct_rel_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(bpf_nf_ct_rel_ids)
+
+/* kfuncs that may return NULL PTR_TO_BTF_ID */
+BTF_SET_START(bpf_nf_ct_null_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(bpf_nf_ct_null_ids)
+
+#else
+
+BTF_SET_START(bpf_nf_ct_xdp_ids)
+BTF_SET_END(bpf_nf_ct_xdp_ids)
+
+BTF_SET_START(bpf_nf_ct_skb_ids)
+BTF_SET_END(bpf_nf_ct_skb_ids)
+
+BTF_SET_START(bpf_nf_ct_acq_ids)
+BTF_SET_END(bpf_nf_ct_acq_ids)
+
+BTF_SET_START(bpf_nf_ct_rel_ids)
+BTF_SET_END(bpf_nf_ct_rel_ids)
+
+BTF_SET_START(bpf_nf_ct_null_ids)
+BTF_SET_END(bpf_nf_ct_null_ids)
+
+#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
+
+static struct kfunc_btf_id_set nf_ct_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_nf_ct_xdp_ids,
+ .acq_set = &bpf_nf_ct_acq_ids,
+ .rel_set = &bpf_nf_ct_rel_ids,
+ .null_set = &bpf_nf_ct_null_ids,
+};
+
+static struct kfunc_btf_id_set nf_ct_skb_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_nf_ct_skb_ids,
+ .acq_set = &bpf_nf_ct_acq_ids,
+ .rel_set = &bpf_nf_ct_rel_ids,
+ .null_set = &bpf_nf_ct_null_ids,
+};
+
void nf_conntrack_cleanup_end(void)
{
+ unregister_kfunc_btf_id_set(&xdp_kfunc_list, &nf_ct_xdp_kfunc_set);
+ unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &nf_ct_skb_kfunc_set);
+
RCU_INIT_POINTER(nf_ct_hook, NULL);
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
kvfree(nf_conntrack_hash);
@@ -2745,6 +3020,9 @@ int nf_conntrack_init_start(void)
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
+ register_kfunc_btf_id_set(&prog_test_kfunc_list, &nf_ct_skb_kfunc_set);
+ register_kfunc_btf_id_set(&xdp_kfunc_list, &nf_ct_xdp_kfunc_set);
+
return 0;
err_proto:
--
2.34.1
Powered by blists - more mailing lists