[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260111163947.811248-2-jhs@mojatatu.com>
Date: Sun, 11 Jan 2026 11:39:42 -0500
From: Jamal Hadi Salim <jhs@...atatu.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
horms@...nel.org,
andrew+netdev@...n.ch
Cc: netdev@...r.kernel.org,
xiyou.wangcong@...il.com,
jiri@...nulli.us,
victor@...atatu.com,
dcaratti@...hat.com,
lariel@...dia.com,
daniel@...earbox.net,
pablo@...filter.org,
kadlec@...filter.org,
fw@...len.de,
phil@....cc,
netfilter-devel@...r.kernel.org,
coreteam@...filter.org,
zyc199902@...omail.cn,
lrGerlinde@...lfence.com,
jschung2@...ton.me,
Jamal Hadi Salim <jhs@...atatu.com>
Subject: [PATCH net 1/6] net: Introduce skb ttl field to track packet loops
In order to keep track of loops across the stack, in particular when going
across from egress->ingress and back,we need to _remember the global loop
state in the skb_.
We introduce a per-skb ttl field to keep track of this state.
This patch liberates two bits:
1) The bit "skb->from_ingress" is reclaimed for ttl. Since it is currently
only used for ifb, it is safe to move this to local-per-layer skb/tc state
on the qdisc_skb_cb struct.
2) A second bit that was available on the skb.
Use cases:
1) Mirred increments the ttl whenever it sees an skb. If the skb shows
up multiple times we catch it when it exceeds MIRRED_NEST_LIMIT iterations
of the loop.
2) netem increments when using the "duplicate" feature and catches it when
it sees the packet the second time.
Fixes: fe946a751d9b ("net/sched: act_mirred: add loop detection")
Fixes: 0afb51e72855 ("[PKT_SCHED]: netem: reinsert for duplication")
Tested-by: Victor Nogueira <victor@...atatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
---
drivers/net/ifb.c | 2 +-
include/linux/skbuff.h | 24 ++----------------------
include/net/sch_generic.h | 22 ++++++++++++++++++++++
net/netfilter/nft_fwd_netdev.c | 1 +
4 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index d3dc0914450a..137a20e4bf8c 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -124,7 +124,7 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
rcu_read_unlock();
skb->skb_iif = txp->dev->ifindex;
- if (!skb->from_ingress) {
+ if (!qdisc_skb_cb(skb)->from_ingress) {
dev_queue_xmit(skb);
} else {
skb_pull_rcsum(skb, skb->mac_len);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 86737076101d..7f18b0c28728 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -840,6 +840,7 @@ enum skb_tstamp_type {
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
* @encapsulation: indicates the inner headers in the skbuff are valid
* @encap_hdr_csum: software checksum is needed
+ * @ttl: time to live count when a packet loops.
* @csum_valid: checksum is already valid
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
* @csum_complete_sw: checksum was completed by software
@@ -1000,6 +1001,7 @@ struct sk_buff {
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
+ __u8 ttl:2;
__u8 csum_valid:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
@@ -1016,9 +1018,6 @@ struct sk_buff {
__u8 offload_l3_fwd_mark:1;
#endif
__u8 redirected:1;
-#ifdef CONFIG_NET_REDIRECT
- __u8 from_ingress:1;
-#endif
#ifdef CONFIG_NETFILTER_SKIP_EGRESS
__u8 nf_skip_egress:1;
#endif
@@ -5352,30 +5351,11 @@ static inline bool skb_is_redirected(const struct sk_buff *skb)
return skb->redirected;
}
-static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
-{
- skb->redirected = 1;
-#ifdef CONFIG_NET_REDIRECT
- skb->from_ingress = from_ingress;
- if (skb->from_ingress)
- skb_clear_tstamp(skb);
-#endif
-}
-
static inline void skb_reset_redirect(struct sk_buff *skb)
{
skb->redirected = 0;
}
-static inline void skb_set_redirected_noclear(struct sk_buff *skb,
- bool from_ingress)
-{
- skb->redirected = 1;
-#ifdef CONFIG_NET_REDIRECT
- skb->from_ingress = from_ingress;
-#endif
-}
-
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IP_SCTP)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3a7268b567e..42d8a1a9db4c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -459,6 +459,9 @@ struct qdisc_skb_cb {
u8 post_ct:1;
u8 post_ct_snat:1;
u8 post_ct_dnat:1;
+#ifdef CONFIG_NET_REDIRECT
+ u8 from_ingress:1;
+#endif
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
@@ -1140,6 +1143,25 @@ static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
q->to_free = skb;
}
+static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
+{
+ skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
+ qdisc_skb_cb(skb)->from_ingress = from_ingress;
+ if (qdisc_skb_cb(skb)->from_ingress)
+ skb_clear_tstamp(skb);
+#endif
+}
+
+static inline void skb_set_redirected_noclear(struct sk_buff *skb,
+ bool from_ingress)
+{
+ skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
+ qdisc_skb_cb(skb)->from_ingress = from_ingress;
+#endif
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 152a9fb4d23a..d62c856ef96a 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_dup_netdev.h>
#include <net/neighbour.h>
#include <net/ip.h>
+#include <net/sch_generic.h>
struct nft_fwd_netdev {
u8 sreg_dev;
--
2.34.1
Powered by blists - more mailing lists