lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <0b153a5ab818dff51110f81550a4050538605a4b.1667252314.git.dcaratti@redhat.com>
Date:   Mon, 31 Oct 2022 22:44:26 +0100
From:   Davide Caratti <dcaratti@...hat.com>
To:     dcaratti@...hat.com
Cc:     jhs@...atatu.com, jiri@...nulli.us, marcelo.leitner@...il.com,
        netdev@...r.kernel.org, pabeni@...hat.com, wizhao@...hat.com,
        xiyou.wangcong@...il.com, peilin.ye@...edance.com
Subject: [RFC net-next] net/sched: act_mirred: allow mirred ingress through networking backlog

using TC mirrred in the ingress direction, packets are passed directly
to the receiver in the same context. There are a couple of reasons that
justify the proposal to use kernel networking backlog instead:

a) avoid the soft lockup observed with TCP when it sends data+ack after
   receiving packets through mirred (William sees them using OVS,
   something similar can be obtained with a kselftest [1)]
b) avoid packet drops caused by mirred hitting MIRRED_RECURSION_LIMIT
   in the above scenario

however, like Cong pointed out [2], we can't just change mirred redirect to
use the networking backlog: this would break users expectation, because it
would be impossible to know the RX status after a packet has been enqueued
to the backlog.

A possible approach can be to extend the current set of TC mirred "eaction",
so that the application can choose to use the backlog instead of the classic
ingress redirect. This would also ease future decisions of performing a
complete scrub of the skb metadata for those packets, without changing the
behavior of existing TC ingress redirect rules.

Any feedback appreciated, thanks!

[1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
[2] https://lore.kernel.org/netdev/YzCZMHYmk53mQ+HK@pop-os.localdomain/

Signed-off-by: Davide Caratti <dcaratti@...hat.com>
---
 include/net/tc_act/tc_mirred.h        |  3 ++-
 include/uapi/linux/tc_act/tc_mirred.h |  1 +
 net/sched/act_mirred.c                | 29 +++++++++++++++++++++------
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 32ce8ea36950..9e10ad1adb57 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -37,7 +37,8 @@ static inline bool is_tcf_mirred_ingress_redirect(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (a->ops && a->ops->id == TCA_ID_MIRRED)
-		return to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR;
+		return (to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR ||
+			to_mirred(a)->tcfm_eaction == TCA_INGRESS_BACKLOG);
 #endif
 	return false;
 }
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 2500a0005d05..e5939a3c9d86 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -9,6 +9,7 @@
 #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
 #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
+#define TCA_INGRESS_BACKLOG 5 /* packet redirect to INGRESS (using Linux backlog) */
 
 struct tc_mirred {
 	tc_gen;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b8ad6ae282c0..9526bc0ee3d2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,7 +33,13 @@ static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
 
 static bool tcf_mirred_is_act_redirect(int action)
 {
-	return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
+	switch (action) {
+	case TCA_EGRESS_REDIR:
+	case TCA_INGRESS_REDIR:
+	case TCA_INGRESS_BACKLOG:
+		return true;
+	}
+	return false;
 }
 
 static bool tcf_mirred_act_wants_ingress(int action)
@@ -44,6 +50,7 @@ static bool tcf_mirred_act_wants_ingress(int action)
 		return false;
 	case TCA_INGRESS_REDIR:
 	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_BACKLOG:
 		return true;
 	default:
 		BUG();
@@ -130,6 +137,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	case TCA_EGRESS_REDIR:
 	case TCA_INGRESS_REDIR:
 	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_BACKLOG:
 		break;
 	default:
 		if (exists)
@@ -205,14 +213,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	return err;
 }
 
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int tcf_mirred_forward(int eaction, struct sk_buff *skb)
 {
 	int err;
 
-	if (!want_ingress)
+	switch (eaction) {
+	case TCA_EGRESS_MIRROR:
+	case TCA_EGRESS_REDIR:
 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
-	else
+		break;
+	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_REDIR:
 		err = netif_receive_skb(skb);
+		break;
+	case TCA_INGRESS_BACKLOG:
+		err = netif_rx(skb);
+		break;
+	}
 
 	return err;
 }
@@ -305,7 +322,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 
 		/* let's the caller reinsert the packet, if possible */
 		if (use_reinsert) {
-			err = tcf_mirred_forward(want_ingress, skb);
+			err = tcf_mirred_forward(m_eaction, skb);
 			if (err)
 				tcf_action_inc_overlimit_qstats(&m->common);
 			__this_cpu_dec(mirred_rec_level);
@@ -313,7 +330,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 		}
 	}
 
-	err = tcf_mirred_forward(want_ingress, skb2);
+	err = tcf_mirred_forward(m_eaction, skb2);
 	if (err) {
 out:
 		tcf_action_inc_overlimit_qstats(&m->common);
-- 
2.37.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ