[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251201-flowtable-offload-ip6ip6-v1-1-1dabf534c074@kernel.org>
Date: Mon, 01 Dec 2025 14:45:13 +0100
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: Pablo Neira Ayuso <pablo@...filter.org>,
Jozsef Kadlecsik <kadlec@...filter.org>, Florian Westphal <fw@...len.de>,
Phil Sutter <phil@....cc>, "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
David Ahern <dsahern@...nel.org>, Shuah Khan <shuah@...nel.org>
Cc: netfilter-devel@...r.kernel.org, coreteam@...filter.org,
netdev@...r.kernel.org, linux-kselftest@...r.kernel.org,
Lorenzo Bianconi <lorenzo@...nel.org>
Subject: [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info
in nf_flowtable_ctx struct
This is a preliminary patch to introduce IP6IP6 flowtable acceleration.
Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
---
net/netfilter/nf_flow_table_ip.c | 80 ++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 36 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 78883343e5d686014752ec4fe1a28319cbf08845..d28c256d33dc5a8d07490b765747b5c6c48aa67d 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -142,7 +142,18 @@ static bool ip_has_options(unsigned int thoff)
return thoff != sizeof(struct iphdr);
}
-static void nf_flow_tuple_encap(struct sk_buff *skb,
+struct nf_flowtable_ctx {
+ const struct net_device *in;
+ u32 offset;
+ u32 hdrsize;
+ struct {
+ u32 offset;
+ u8 proto;
+ } tun;
+};
+
+static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
__be16 inner_proto = skb->protocol;
@@ -174,22 +185,15 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
break;
}
- if (inner_proto == htons(ETH_P_IP)) {
+ if (inner_proto == htons(ETH_P_IP) &&
+ ctx->tun.proto == IPPROTO_IPIP) {
iph = (struct iphdr *)(skb_network_header(skb) + offset);
- if (iph->protocol == IPPROTO_IPIP) {
- tuple->tun.dst_v4.s_addr = iph->daddr;
- tuple->tun.src_v4.s_addr = iph->saddr;
- tuple->tun.l3_proto = IPPROTO_IPIP;
- }
+ tuple->tun.dst_v4.s_addr = iph->daddr;
+ tuple->tun.src_v4.s_addr = iph->saddr;
+ tuple->tun.l3_proto = IPPROTO_IPIP;
}
}
-struct nf_flowtable_ctx {
- const struct net_device *in;
- u32 offset;
- u32 hdrsize;
-};
-
static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
@@ -257,7 +261,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
tuple->iifidx = ctx->in->ifindex;
- nf_flow_tuple_encap(skb, tuple);
+ nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -293,15 +297,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb)
{
struct iphdr *iph;
u16 size;
- if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+ if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return false;
- iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -310,25 +315,27 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
if (iph->ttl <= 1)
return false;
- if (iph->protocol == IPPROTO_IPIP)
- *psize += size;
+ if (iph->protocol == IPPROTO_IPIP) {
+ ctx->tun.proto = IPPROTO_IPIP;
+ ctx->tun.offset = size;
+ ctx->offset += size;
+ }
return true;
}
-static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb)
{
- struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
-
- if (iph->protocol != IPPROTO_IPIP)
+ if (ctx->tun.proto != IPPROTO_IPIP)
return;
- skb_pull(skb, iph->ihl << 2);
+ skb_pull(skb, ctx->tun.offset);
skb_reset_network_header(skb);
}
-static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
- u32 *offset)
+static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb, __be16 proto)
{
__be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
@@ -341,7 +348,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
- *offset += VLAN_HLEN;
+ ctx->offset += VLAN_HLEN;
inner_proto = proto;
ret = true;
}
@@ -349,19 +356,20 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
- *offset += PPPOE_SES_HLEN;
+ ctx->offset += PPPOE_SES_HLEN;
ret = true;
}
break;
}
if (inner_proto == htons(ETH_P_IP))
- ret = nf_flow_ip4_tunnel_proto(skb, offset);
+ ret = nf_flow_ip4_tunnel_proto(ctx, skb);
return ret;
}
-static void nf_flow_encap_pop(struct sk_buff *skb,
+static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
struct vlan_hdr *vlan_hdr;
@@ -388,7 +396,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
}
if (skb->protocol == htons(ETH_P_IP))
- nf_flow_ip4_tunnel_pop(skb);
+ nf_flow_ip4_tunnel_pop(ctx, skb);
}
struct nf_flow_xmit {
@@ -414,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
- if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -458,7 +466,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash);
+ nf_flow_encap_pop(ctx, skb, tuplehash);
thoff -= ctx->offset;
iph = ip_hdr(skb);
@@ -836,7 +844,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
tuple->iifidx = ctx->in->ifindex;
- nf_flow_tuple_encap(skb, tuple);
+ nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -873,7 +881,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash);
+ nf_flow_encap_pop(ctx, skb, tuplehash);
ip6h = ipv6_hdr(skb);
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -895,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
struct flow_offload_tuple tuple = {};
if (skb->protocol != htons(ETH_P_IPV6) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+ !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
return NULL;
if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
--
2.52.0
Powered by blists - more mailing lists