[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20251210172554.1071864-1-jhs@mojatatu.com>
Date: Wed, 10 Dec 2025 12:25:54 -0500
From: Jamal Hadi Salim <jhs@...atatu.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
horms@...nel.org
Cc: netdev@...r.kernel.org,
xiyou.wangcong@...il.com,
jiri@...nulli.us,
victor@...atatu.com,
dcaratti@...hat.com,
lariel@...dia.com,
daniel@...earbox.net,
pablo@...filter.org,
Jamal Hadi Salim <jhs@...atatu.com>
Subject: [PATCH RFC net 1/1] net: sched: Fix ethx:ingress -> ethy:egress -> ethx:ingress mirred loop
This patch could be broken down into multiple patches(at least two), but
posted as one because it is an RFC.
When mirred redirects from egress to ingress the loop state is lost.
This is because the current loop detection mechanism depends on the device
being rememebred on the sched_mirred_dev array; however, that array is
cleared when we go from egress->ingress because the packet ends up in the
backlog and when we restart from the backlog the loop is amplified, on and
on...
A simple test case:
tc qdisc add dev ethx clsact
tc qdisc add dev ethy clsact
tc filter add dev ethx ingress protocol ip \
prio 10 matchall action mirred egress redirect dev ethy
tc filter add dev ethy egress protocol ip \
prio 10 matchall action mirred ingress redirect dev ethx
ping such that packets arrive on ethx. Puff and sweat while the cpu
consumption goes up. Or just delete those two qdiscs from above
on ethx and ethy.
For this to work we need to _remember the loop state in the skb_.
We reclaim the bit "skb->from_ingress" to the qdisc_skb_cb since its use
is constrained for ifb. We then use an extra bit that was available on
the skb for a total of 2 "skb->ttl" bits.
Mirred increments the ttl whenever it sees the same skb. We then
catch it when it exceeds MIRRED_NEST_LIMIT iterations of the loop.
Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
---
drivers/net/ifb.c | 3 +--
include/linux/skbuff.h | 35 +++--------------------------------
include/net/sch_generic.h | 30 ++++++++++++++++++++++++++++++
net/sched/act_mirred.c | 5 ++++-
4 files changed, 38 insertions(+), 35 deletions(-)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index d3dc0914450a..4783d479d1d6 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -123,8 +123,7 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
}
rcu_read_unlock();
skb->skb_iif = txp->dev->ifindex;
-
- if (!skb->from_ingress) {
+ if (!qdisc_skb_cb(skb)->from_ingress) {
dev_queue_xmit(skb);
} else {
skb_pull_rcsum(skb, skb->mac_len);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 86737076101d..2e76d84eddf8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1000,6 +1000,9 @@ struct sk_buff {
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
+#ifdef CONFIG_NET_REDIRECT
+ __u8 ttl:2;
+#endif
__u8 csum_valid:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
@@ -1016,9 +1019,6 @@ struct sk_buff {
__u8 offload_l3_fwd_mark:1;
#endif
__u8 redirected:1;
-#ifdef CONFIG_NET_REDIRECT
- __u8 from_ingress:1;
-#endif
#ifdef CONFIG_NETFILTER_SKIP_EGRESS
__u8 nf_skip_egress:1;
#endif
@@ -5347,35 +5347,6 @@ static inline __wsum lco_csum(struct sk_buff *skb)
return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
}
-static inline bool skb_is_redirected(const struct sk_buff *skb)
-{
- return skb->redirected;
-}
-
-static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
-{
- skb->redirected = 1;
-#ifdef CONFIG_NET_REDIRECT
- skb->from_ingress = from_ingress;
- if (skb->from_ingress)
- skb_clear_tstamp(skb);
-#endif
-}
-
-static inline void skb_reset_redirect(struct sk_buff *skb)
-{
- skb->redirected = 0;
-}
-
-static inline void skb_set_redirected_noclear(struct sk_buff *skb,
- bool from_ingress)
-{
- skb->redirected = 1;
-#ifdef CONFIG_NET_REDIRECT
- skb->from_ingress = from_ingress;
-#endif
-}
-
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IP_SCTP)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3a7268b567e..7580ccb65ba5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -459,6 +459,7 @@ struct qdisc_skb_cb {
u8 post_ct:1;
u8 post_ct_snat:1;
u8 post_ct_dnat:1;
+ u8 from_ingress:1;
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
@@ -1140,6 +1141,35 @@ static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
q->to_free = skb;
}
+static inline bool skb_is_redirected(const struct sk_buff *skb)
+{
+ return skb->redirected;
+}
+
+static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
+{
+ skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
+ qdisc_skb_cb(skb)->from_ingress = from_ingress;
+ if (qdisc_skb_cb(skb)->from_ingress)
+ skb_clear_tstamp(skb);
+#endif
+}
+
+static inline void skb_reset_redirect(struct sk_buff *skb)
+{
+ skb->redirected = 0;
+}
+
+static inline void skb_set_redirected_noclear(struct sk_buff *skb,
+ bool from_ingress)
+{
+ skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
+ qdisc_skb_cb(skb)->from_ingress = from_ingress;
+#endif
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 91c96cc625bd..fec5a5763fcb 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -318,8 +318,10 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
+ skb_to_send->ttl++;
err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
} else {
+ skb_to_send->ttl++;
err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
}
if (err)
@@ -434,7 +436,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
#else
xmit = this_cpu_ptr(&softnet_data.xmit);
#endif
- if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) {
+
+ if (skb->ttl >= MIRRED_NEST_LIMIT - 1) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev));
return TC_ACT_SHOT;
--
2.34.1
Powered by blists - more mailing lists