[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180304194044.26751-3-fw@strlen.de>
Date: Sun, 4 Mar 2018 20:40:43 +0100
From: Florian Westphal <fw@...len.de>
To: <netdev@...r.kernel.org>
Cc: daniel@...earbox.net, ast@...nel.org, pablo@...filter.org,
Florian Westphal <fw@...len.de>
Subject: [RFC,POC 2/3] bpfilter: add nftables jit proof-of-concept
This adds a nftables frontend for the IMR->BPF translator.
This doesn't work via UMH yet.
AFAIU it should be possible to get transparent ebpf translation for
nftables, similar to the bpfilter/iptables UMH.
However, at this time I think its better to get IMR "right".
nftjit.ko currently needs libnftnl/libmnl but thats convenince on
my end and not a "must have".
Signed-off-by: Florian Westphal <fw@...len.de>
---
net/bpfilter/Makefile | 7 +-
net/bpfilter/nftables.c | 679 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 685 insertions(+), 1 deletion(-)
create mode 100644 net/bpfilter/nftables.c
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 5a85ef7d7a4d..a4064986dc2f 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -3,7 +3,12 @@
# Makefile for the Linux BPFILTER layer.
#
-hostprogs-y := bpfilter.ko
+hostprogs-y := nftjit.ko bpfilter.ko
always := $(hostprogs-y)
bpfilter.ko-objs := bpfilter.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o
+
+NFT_LIBS = -lnftnl
+nftjit.ko-objs := tgts.o targets.o tables.o init.o ctor.o gen.o nftables.o imr.o
+HOSTLOADLIBES_nftjit.ko = `pkg-config --libs libnftnl libmnl`
+
HOSTCFLAGS += -I. -Itools/include/
diff --git a/net/bpfilter/nftables.c b/net/bpfilter/nftables.c
new file mode 100644
index 000000000000..5a756ccd03a1
--- /dev/null
+++ b/net/bpfilter/nftables.c
@@ -0,0 +1,679 @@
+/*
+ * based on previous code from:
+ *
+ * Copyright (c) 2013 Arturo Borrero Gonzalez <arturo@...filter.org>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@...filter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include <utils.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+
+#include <libmnl/libmnl.h>
+#include <libnftnl/common.h>
+#include <libnftnl/ruleset.h>
+#include <libnftnl/table.h>
+#include <libnftnl/chain.h>
+#include <libnftnl/set.h>
+#include <libnftnl/expr.h>
+#include <libnftnl/rule.h>
+
+#include <linux/if_ether.h>
+
+#include "bpfilter_mod.h"
+#include "imr.h"
+
+/* Hack, we don't link bpfilter.o */
+extern long int syscall (long int __sysno, ...);
+
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return syscall(321, cmd, attr, size);
+}
+
+static int seq;
+
+static void memory_allocation_error(void) { perror("allocation failed"); exit(1); }
+
+static int nft_reg_to_imr_reg(int nfreg)
+{
+ switch (nfreg) {
+ case NFT_REG_VERDICT:
+ return IMR_REG_0;
+ /* old register numbers, 4 128 bit registers. */
+ case NFT_REG_1:
+ return IMR_REG_4;
+ case NFT_REG_2:
+ return IMR_REG_6;
+ case NFT_REG_3:
+ return IMR_REG_8;
+ case NFT_REG_4:
+ break;
+ /* new register numbers, 16 32 bit registers, map to old ones */
+ case NFT_REG32_00:
+ return IMR_REG_4;
+ case NFT_REG32_01:
+ return IMR_REG_5;
+ case NFT_REG32_02:
+ return IMR_REG_6;
+ default:
+ return -1;
+ }
+ return -1;
+}
+
+static int netlink_parse_immediate(const struct nftnl_expr *nle, void *out)
+{
+ struct imr_state *state = out;
+ struct imr_object *o = NULL;
+
+ if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_DATA)) {
+ uint32_t len;
+ int reg;
+
+ nftnl_expr_get(nle, NFTNL_EXPR_IMM_DATA, &len);
+
+ switch (len) {
+ case sizeof(uint32_t):
+ o = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, NFTNL_EXPR_IMM_DATA));
+ break;
+ case sizeof(uint64_t):
+ o = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, NFTNL_EXPR_IMM_DATA));
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+ reg = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle,
+ NFTNL_EXPR_IMM_DREG));
+ if (reg < 0) {
+ imr_object_free(o);
+ return reg;
+ }
+
+ imr_register_store(state, reg, o);
+ return 0;
+ } else if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_VERDICT)) {
+ uint32_t verdict;
+ int ret;
+
+ if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_CHAIN))
+ return -ENOTSUPP;
+
+ verdict = nftnl_expr_get_u32(nle, NFTNL_EXPR_IMM_VERDICT);
+
+ switch (verdict) {
+ case NF_ACCEPT:
+ o = imr_object_alloc_verdict(IMR_VERDICT_PASS);
+ break;
+ case NF_DROP:
+ o = imr_object_alloc_verdict(IMR_VERDICT_DROP);
+ break;
+ default:
+ fprintf(stderr, "Unhandled verdict %d\n", verdict);
+ o = imr_object_alloc_verdict(IMR_VERDICT_DROP);
+ break;
+ }
+
+ ret = imr_state_add_obj(state, o);
+ if (ret < 0)
+ imr_object_free(o);
+
+ return ret;
+ }
+
+ return -ENOTSUPP;
+}
+
+static int netlink_parse_cmp(const struct nftnl_expr *nle, void *out)
+{
+ struct imr_object *o, *imm, *left;
+ struct imr_state *state = out;
+ enum imr_relop op;
+ uint32_t tmp, len;
+ int ret;
+ op = nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_OP);
+
+ switch (op) {
+ case NFT_CMP_EQ:
+ op = IMR_RELOP_EQ;
+ break;
+ case NFT_CMP_NEQ:
+ op = IMR_RELOP_NE;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ nftnl_expr_get(nle, NFTNL_EXPR_CMP_DATA, &len);
+ switch (len) {
+ case sizeof(uint64_t):
+ imm = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, NFTNL_EXPR_CMP_DATA));
+ break;
+ case sizeof(uint32_t):
+ imm = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_DATA));
+ break;
+ case sizeof(uint16_t):
+ tmp = nftnl_expr_get_u16(nle, NFTNL_EXPR_CMP_DATA);
+
+ imm = imr_object_alloc_imm32(tmp);
+ break;
+ case sizeof(uint8_t):
+ tmp = nftnl_expr_get_u8(nle, NFTNL_EXPR_CMP_DATA);
+
+ imm = imr_object_alloc_imm32(tmp);
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ if (!imm)
+ return -ENOMEM;
+
+ ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_SREG));
+ if (ret < 0) {
+ imr_object_free(imm);
+ return ret;
+ }
+
+ left = imr_register_load(state, ret);
+ if (!left)
+ return -EINVAL;
+
+ o = imr_object_alloc_relational(op, left, imm);
+
+ return imr_state_add_obj(state, o);
+}
+
+static int netlink_parse_payload(const struct nftnl_expr *nle, void *out)
+{
+ struct imr_state *state = out;
+ enum imr_payload_base imr_base;
+ uint32_t base, offset, len;
+ struct imr_object *payload;
+ int ret;
+
+ if (nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_SREG) ||
+ nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_FLAGS))
+ return -EOPNOTSUPP;
+
+ base = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_BASE);
+ offset = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_OFFSET);
+ len = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_LEN);
+
+ printf("payload: base %d off %d len %d\n", base, offset, len);
+
+ ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_DREG));
+ if (ret < 0)
+ return ret;
+
+ switch (base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ imr_base = IMR_PAYLOAD_BASE_LL;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ imr_base = IMR_PAYLOAD_BASE_NH;
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ imr_base = IMR_PAYLOAD_BASE_TH;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ payload = imr_object_alloc_payload(imr_base, offset, len);
+ if (!payload)
+ return -ENOMEM;
+
+ imr_register_store(state, ret, payload);
+ return 0;
+}
+
+static const struct {
+ const char *name;
+ int (*parse)(const struct nftnl_expr *nle,
+ void *);
+} netlink_parsers[] = {
+ { .name = "immediate", .parse = netlink_parse_immediate },
+ { .name = "cmp", .parse = netlink_parse_cmp },
+ { .name = "payload", .parse = netlink_parse_payload },
+};
+
+static int expr_parse_cb(struct nftnl_expr *expr, void *data)
+{
+ const char *name = nftnl_expr_get_str(expr, NFTNL_EXPR_NAME);
+ struct imr_state *state = data;
+ unsigned int i;
+
+ if (!name)
+ return -1;
+
+ for (i = 0; i < ARRAY_SIZE(netlink_parsers); i++) {
+ if (strcmp(netlink_parsers[i].name, name))
+ continue;
+
+ printf("parse: %s\n", nftnl_expr_get_str(expr, NFTNL_EXPR_NAME));
+ netlink_parsers[i].parse(expr, state);
+ }
+
+ return 0;
+}
+
+static int rule_parse_cb(struct nftnl_rule *rule, void *data)
+{
+ struct imr_state *state = data;
+ int ret;
+
+ ret = imr_state_rule_begin(state);
+ if (ret < 0)
+ return ret;
+ nftnl_expr_foreach(rule, expr_parse_cb, data);
+
+ return imr_state_rule_end(state);
+}
+
+static int
+mnl_talk(struct mnl_socket *nf_sock, const void *data, unsigned int len,
+ int (*cb)(const struct nlmsghdr *nlh, void *data), void *cb_data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ uint32_t portid = mnl_socket_get_portid(nf_sock);
+ int ret;
+
+ if (mnl_socket_sendto(nf_sock, data, len) < 0)
+ return -1;
+
+ ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+ while (ret > 0) {
+ ret = mnl_cb_run(buf, ret, seq, portid, cb, cb_data);
+ if (ret <= 0)
+ goto out;
+
+ ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+ }
+out:
+ if (ret < 0 && errno == EAGAIN)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Rule
+ */
+static int rule_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nftnl_rule_list *nlr_list = data;
+ struct nftnl_rule *r;
+
+ r = nftnl_rule_alloc();
+ if (r == NULL)
+ memory_allocation_error();
+
+ if (nftnl_rule_nlmsg_parse(nlh, r) < 0)
+ goto err_free;
+
+ nftnl_rule_list_add_tail(r, nlr_list);
+ return MNL_CB_OK;
+
+err_free:
+ nftnl_rule_free(r);
+ return MNL_CB_OK;
+}
+
+static struct nftnl_rule_list *mnl_rule_dump(struct mnl_socket *nf_sock,
+ int family)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nftnl_rule_list *nlr_list;
+ int ret;
+
+ nlr_list = nftnl_rule_list_alloc();
+ if (nlr_list == NULL)
+ memory_allocation_error();
+
+ nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, family,
+ NLM_F_DUMP, seq);
+
+ ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, rule_cb, nlr_list);
+ if (ret < 0)
+ goto err;
+
+ return nlr_list;
+err:
+ nftnl_rule_list_free(nlr_list);
+ return NULL;
+}
+
+/*
+ * Chain
+ */
+static int chain_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nftnl_chain_list *nlc_list = data;
+ struct nftnl_chain *c;
+
+ c = nftnl_chain_alloc();
+ if (c == NULL)
+ memory_allocation_error();
+
+ if (nftnl_chain_nlmsg_parse(nlh, c) < 0)
+ goto err_free;
+
+ nftnl_chain_list_add_tail(c, nlc_list);
+ return MNL_CB_OK;
+
+err_free:
+ nftnl_chain_free(c);
+ return MNL_CB_OK;
+}
+
+static struct nftnl_chain_list *mnl_chain_dump(struct mnl_socket *nf_sock,
+ int family)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nftnl_chain_list *nlc_list;
+ int ret;
+
+ nlc_list = nftnl_chain_list_alloc();
+ if (nlc_list == NULL)
+ memory_allocation_error();
+
+ nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, family,
+ NLM_F_DUMP, seq);
+
+ ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, chain_cb, nlc_list);
+ if (ret < 0)
+ goto err;
+
+ return nlc_list;
+err:
+ nftnl_chain_list_free(nlc_list);
+ return NULL;
+}
+
+/*
+ * Table
+ */
+static int table_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nftnl_ruleset *rs = data;
+ struct nftnl_table *t;
+
+ t = nftnl_table_alloc();
+ if (t == NULL)
+ memory_allocation_error();
+
+ if (nftnl_table_nlmsg_parse(nlh, t) < 0)
+ goto err_free;
+
+ nftnl_ruleset_set(rs, NFTNL_RULESET_TABLELIST, t);
+
+ return MNL_CB_OK;
+
+err_free:
+ nftnl_table_free(t);
+ return MNL_CB_ERROR;
+}
+
+/*
+ * Set elements
+ */
+static int set_elem_cb(const struct nlmsghdr *nlh, void *data)
+{
+ nftnl_set_elems_nlmsg_parse(nlh, data);
+ return MNL_CB_OK;
+}
+
+static int mnl_setelem_get(struct mnl_socket *nf_sock, struct nftnl_set *nls)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ uint32_t family = nftnl_set_get_u32(nls, NFTNL_SET_FAMILY);
+
+ nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, family,
+ NLM_F_DUMP|NLM_F_ACK, seq);
+ nftnl_set_nlmsg_build_payload(nlh, nls);
+
+ return mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_elem_cb, nls);
+}
+
+/*
+ * Set
+ */
+static int set_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nftnl_set_list *nls_list = data;
+ struct nftnl_set *s;
+
+ s = nftnl_set_alloc();
+ if (s == NULL)
+ memory_allocation_error();
+
+ if (nftnl_set_nlmsg_parse(nlh, s) < 0)
+ goto err_free;
+
+ nftnl_set_list_add_tail(s, nls_list);
+ return MNL_CB_OK;
+
+err_free:
+ nftnl_set_free(s);
+ return MNL_CB_OK;
+}
+
+static struct nftnl_set_list *
+mnl_set_dump(struct mnl_socket *nf_sock, int family)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nftnl_set *s;
+ struct nftnl_set_list *nls_list;
+ struct nftnl_set *si;
+ struct nftnl_set_list_iter *i;
+ int ret;
+
+ s = nftnl_set_alloc();
+ if (s == NULL)
+ memory_allocation_error();
+
+ nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family,
+ NLM_F_DUMP|NLM_F_ACK, seq);
+ nftnl_set_nlmsg_build_payload(nlh, s);
+ nftnl_set_free(s);
+
+ nls_list = nftnl_set_list_alloc();
+ if (nls_list == NULL)
+ memory_allocation_error();
+
+ ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_cb, nls_list);
+ if (ret < 0)
+ goto err;
+
+ i = nftnl_set_list_iter_create(nls_list);
+ if (i == NULL)
+ memory_allocation_error();
+
+ si = nftnl_set_list_iter_next(i);
+ while (si != NULL) {
+ if (mnl_setelem_get(nf_sock, si) != 0) {
+ perror("E: Unable to get set elements");
+ nftnl_set_list_iter_destroy(i);
+ goto err;
+ }
+ si = nftnl_set_list_iter_next(i);
+ }
+
+ nftnl_set_list_iter_destroy(i);
+
+ return nls_list;
+err:
+ nftnl_set_list_free(nls_list);
+ return NULL;
+}
+
+static struct nftnl_ruleset *mnl_table_ruleset(struct mnl_socket *nf_sock,
+ int family,
+ const char *table)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nftnl_ruleset *rs;
+ struct nftnl_table *t;
+ struct nlmsghdr *nlh;
+ int ret;
+
+ nlh = nftnl_table_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, family,
+ NLM_F_ACK, seq);
+ t = nftnl_table_alloc();
+ if (t == NULL)
+ memory_allocation_error();
+
+ nftnl_table_set(t, NFTNL_TABLE_NAME, table);
+ nftnl_table_nlmsg_build_payload(nlh, t);
+ nftnl_table_free(t);
+
+ rs = nftnl_ruleset_alloc();
+ if (rs == NULL)
+ memory_allocation_error();
+ ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, table_cb, rs);
+ if (ret < 0)
+ goto err;
+
+ return rs;
+err:
+ nftnl_ruleset_free(rs);
+ return NULL;
+}
+
+static struct nftnl_ruleset *mnl_ruleset_dump(struct mnl_socket *nf_sock, int family)
+{
+ struct nftnl_ruleset *rs;
+ struct nftnl_chain_list *c;
+ struct nftnl_set_list *s;
+ struct nftnl_rule_list *r;
+ uint32_t type = NFTNL_OUTPUT_DEFAULT;
+
+ rs = mnl_table_ruleset(nf_sock, family, "filter");
+ if (!rs)
+ return NULL;
+
+ c = mnl_chain_dump(nf_sock, family);
+ if (c != NULL)
+ nftnl_ruleset_set(rs, NFTNL_RULESET_CHAINLIST, c);
+
+ s = mnl_set_dump(nf_sock, family);
+ if (s != NULL)
+ nftnl_ruleset_set(rs, NFTNL_RULESET_SETLIST, s);
+
+ r = mnl_rule_dump(nf_sock, family);
+ if (r != NULL)
+ nftnl_ruleset_set(rs, NFTNL_RULESET_RULELIST, r);
+
+ nftnl_ruleset_fprintf(stdout, rs, type, 0);
+ return rs;
+}
+
+/* ether type ne 0x800 accept */
+static int nft_ipv4_only(struct imr_state *state)
+{
+ struct imr_object *eth_p_ip, *lltype, *relop;
+ int ret;
+
+ imr_state_rule_begin(state);
+ lltype = imr_object_alloc_payload(IMR_PAYLOAD_BASE_LL,
+ offsetof(struct ethhdr, h_proto),
+ sizeof(uint16_t));
+ if (!lltype)
+ return -ENOMEM;
+
+ eth_p_ip = imr_object_alloc_imm32(htons(ETH_P_IP));
+ if (!eth_p_ip) {
+ imr_object_free(lltype);
+ return -ENOMEM;
+ }
+
+ relop = imr_object_alloc_relational(IMR_RELOP_NE, lltype, eth_p_ip);
+ if (!relop) {
+ imr_object_free(eth_p_ip);
+ imr_object_free(lltype);
+ return -ENOMEM;
+ }
+
+ ret = imr_state_add_obj(state, relop);
+ if (ret == 0) {
+ ret = imr_state_add_obj(state, imr_object_alloc_verdict(IMR_VERDICT_PASS));
+ if (ret == 0)
+ return imr_state_rule_end(state);
+ }
+
+ return ret;
+}
+
+static int nft2imr(const struct nftnl_ruleset *rs)
+{
+ struct nftnl_rule_list *l = nftnl_ruleset_get(rs, NFTNL_RULESET_RULELIST);
+ struct imr_state *state;
+ int ret;
+
+ state = imr_state_alloc();
+ if (!state)
+ return -ENOMEM;
+
+ ret = nft_ipv4_only(state);
+
+ ret = nftnl_rule_list_foreach(l, rule_parse_cb, state);
+ if (ret < 0) {
+ imr_state_free(state);
+ return ret;
+ }
+
+ imr_state_print(stdout, state);
+ imr_do_bpf(state);
+ imr_state_free(state);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct mnl_socket *nl;
+ struct nftnl_ruleset *rs;
+
+ if (argc > 2) {
+ fprintf(stderr, "%s {json}\n",
+ argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ nl = mnl_socket_open(NETLINK_NETFILTER);
+ if (nl == NULL) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ seq = time(NULL);
+
+ rs = mnl_ruleset_dump(nl, NFPROTO_IPV4);
+ if (rs == NULL) {
+ perror("ruleset_dump");
+ exit(EXIT_FAILURE);
+ }
+
+ return nft2imr(rs);
+}
--
2.16.1
Powered by blists - more mailing lists