[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250408142425.95437-3-ericwouds@gmail.com>
Date: Tue, 8 Apr 2025 16:24:25 +0200
From: Eric Woudstra <ericwouds@...il.com>
To: Michal Ostrowski <mostrows@...thlink.net>,
Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Pablo Neira Ayuso <pablo@...filter.org>,
Jozsef Kadlecsik <kadlec@...filter.org>,
Simon Horman <horms@...nel.org>,
Nikolay Aleksandrov <razor@...ckwall.org>
Cc: netdev@...r.kernel.org,
netfilter-devel@...r.kernel.org,
linux-hardening@...r.kernel.org,
Eric Woudstra <ericwouds@...il.com>
Subject: [PATCH v11 nf-next 2/2] netfilter: nf_flow_table_offload: Add nf_flow_encap_push() for xmit direct
Loosely based on wenxu's patches:
"nf_flow_table_offload: offload the vlan/PPPoE encap in the flowtable".
Fixed double vlan and pppoe packets, almost entirely rewriting the patch.
When there is no extra vlan-device or pppoe-device added to the fastpath,
it may still be possible that the other tuple has encaps.
This is the case when there is only a bridge in the forward-fastpath,
without a vlan-device. When the bridge is tagging at ingress and keeping
at egress, the other tuple will have an encap.
It will be also be the case in the future bridge-fastpath.
In these cases it is necessary to push these encaps.
This patch adds nf_flow_encap_push() and alters nf_flow_queue_xmit()
to call it, only when (tuple.out.ifidx == tuple.out.hw_ifidx).
Signed-off-by: Eric Woudstra <ericwouds@...il.com>
---
net/netfilter/nf_flow_table_ip.c | 97 +++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 8cd4cf7ae211..64a12b9668e7 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -306,6 +306,92 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
return false;
}
+static int nf_flow_vlan_inner_push(struct sk_buff *skb, __be16 proto, u16 id)
+{
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, VLAN_HLEN))
+ return -1;
+
+ __skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+
+ vhdr = (struct vlan_hdr *)(skb->data);
+ vhdr->h_vlan_TCI = htons(id);
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = proto;
+
+ return 0;
+}
+
+static int nf_flow_ppoe_push(struct sk_buff *skb, u16 id)
+{
+ struct ppp_hdr {
+ struct pppoe_hdr hdr;
+ __be16 proto;
+ } *ph;
+ int data_len = skb->len + 2;
+ __be16 proto;
+
+ if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ return -1;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ proto = htons(PPP_IP);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ proto = htons(PPP_IPV6);
+ else
+ return -1;
+
+ __skb_push(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+
+ ph = (struct ppp_hdr *)(skb->data);
+ ph->hdr.ver = 1;
+ ph->hdr.type = 1;
+ ph->hdr.code = 0;
+ ph->hdr.sid = htons(id);
+ ph->hdr.length = htons(data_len);
+ ph->proto = proto;
+ skb->protocol = htons(ETH_P_PPP_SES);
+
+ return 0;
+}
+
+static int nf_flow_encap_push(struct sk_buff *skb,
+ struct flow_offload_tuple_rhash *tuplehash,
+ unsigned short *type)
+{
+ int i = 0, ret = 0;
+
+ if (!tuplehash->tuple.encap_num)
+ return 0;
+
+ if (tuplehash->tuple.encap[i].proto == htons(ETH_P_8021Q) ||
+ tuplehash->tuple.encap[i].proto == htons(ETH_P_8021AD)) {
+ __vlan_hwaccel_put_tag(skb, tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ i++;
+ if (i >= tuplehash->tuple.encap_num)
+ return 0;
+ }
+
+ switch (tuplehash->tuple.encap[i].proto) {
+ case htons(ETH_P_8021Q):
+ *type = ETH_P_8021Q;
+ ret = nf_flow_vlan_inner_push(skb,
+ tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ break;
+ case htons(ETH_P_PPP_SES):
+ *type = ETH_P_PPP_SES;
+ ret = nf_flow_ppoe_push(skb,
+ tuplehash->tuple.encap[i].id);
+ break;
+ }
+ return ret;
+}
+
static void nf_flow_encap_pop(struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
@@ -335,6 +421,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
const struct flow_offload_tuple_rhash *tuplehash,
+ struct flow_offload_tuple_rhash *other_tuplehash,
unsigned short type)
{
struct net_device *outdev;
@@ -343,6 +430,10 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
if (!outdev)
return NF_DROP;
+ if (tuplehash->tuple.out.ifidx == tuplehash->tuple.out.hw_ifidx &&
+ (nf_flow_encap_push(skb, other_tuplehash, &type) < 0))
+ return NF_DROP;
+
skb->dev = outdev;
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
tuplehash->tuple.out.h_source, skb->len);
@@ -462,7 +553,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IP);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
@@ -757,7 +849,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IPV6);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
--
2.47.1
Powered by blists - more mailing lists