[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250414172022.242991-2-idosch@nvidia.com>
Date: Mon, 14 Apr 2025 20:20:21 +0300
From: Ido Schimmel <idosch@...dia.com>
To: <netdev@...r.kernel.org>
CC: <davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>,
<edumazet@...gle.com>, <dsahern@...nel.org>, <horms@...nel.org>,
<hanhuihui5@...wei.com>, Ido Schimmel <idosch@...dia.com>
Subject: [PATCH net 1/2] net: fib_rules: Fix iif / oif matching on L3 master device
Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and
avoid oif reset for port devices") it was possible to use FIB rules to
match on a L3 domain. This was done by having a FIB rule match on iif /
oif being a L3 master device. It worked because prior to the FIB rule
lookup the iif / oif fields in the flow structure were reset to the
index of the L3 master device to which the input / output device was
enslaved to.
The above scheme made it impossible to match on the original input /
output device. Therefore, cited commit stopped overwriting the iif / oif
fields in the flow structure and instead stored the index of the
enslaving L3 master device in a new field ('flowi_l3mdev') in the flow
structure.
While the change enabled new use cases, it broke the original use case
of matching on a L3 domain. Fix this by interpreting the iif / oif
matching on a L3 master device as a match against the L3 domain. In
other words, if the iif / oif in the FIB rule points to a L3 master
device, compare the provided index against 'flowi_l3mdev' rather than
'flowi_{i,o}if'.
Before cited commit, a FIB rule that matched on 'iif vrf1' would only
match incoming traffic from devices enslaved to 'vrf1'. With the
proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would
also match traffic originating from a socket bound to 'vrf1'. Avoid that
by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if
the L3 domain was derived from the output interface or the input
interface (when not set) and take this flag into account when evaluating
the FIB rule against the flow structure.
Avoid unnecessary checks in the data path by detecting that a rule
matches on a L3 master device when the rule is installed and marking it
as such.
Tested using the following script [1].
Output before 40867d74c374 (v5.4.291):
default dev dummy1 table 100 scope link
default dev dummy1 table 200 scope link
Output after 40867d74c374:
default dev dummy1 table 300 scope link
default dev dummy1 table 300 scope link
Output with this patch:
default dev dummy1 table 100 scope link
default dev dummy1 table 200 scope link
[1]
#!/bin/bash
ip link add name vrf1 up type vrf table 10
ip link add name dummy1 up master vrf1 type dummy
sysctl -wq net.ipv4.conf.all.forwarding=1
sysctl -wq net.ipv4.conf.all.rp_filter=0
ip route add table 100 default dev dummy1
ip route add table 200 default dev dummy1
ip route add table 300 default dev dummy1
ip rule add prio 0 oif vrf1 table 100
ip rule add prio 1 iif vrf1 table 200
ip rule add prio 2 table 300
ip route get 192.0.2.1 oif dummy1 fibmatch
ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch
Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices")
Reported-by: hanhuihui <hanhuihui5@...wei.com>
Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/
Signed-off-by: Ido Schimmel <idosch@...dia.com>
---
include/net/fib_rules.h | 2 ++
include/net/flow.h | 1 +
include/net/l3mdev.h | 27 +++++++++++++++++++++++
net/core/fib_rules.c | 48 ++++++++++++++++++++++++++++++++++-------
net/l3mdev/l3mdev.c | 4 +++-
5 files changed, 73 insertions(+), 9 deletions(-)
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 5927910ec06e..6e68e359ad18 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -45,6 +45,8 @@ struct fib_rule {
struct fib_rule_port_range dport_range;
u16 sport_mask;
u16 dport_mask;
+ u8 iif_is_l3_master;
+ u8 oif_is_l3_master;
struct rcu_head rcu;
};
diff --git a/include/net/flow.h b/include/net/flow.h
index 335bbc52171c..2a3f0c42f092 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -38,6 +38,7 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_KNOWN_NH 0x02
+#define FLOWI_FLAG_L3MDEV_OIF 0x04
__u32 flowic_secid;
kuid_t flowic_uid;
__u32 flowic_multipath_hash;
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index f7fe796e8429..1eb8dad18f7e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) &&
+ fl->flowi_l3mdev == iifindex;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF &&
+ fl->flowi_l3mdev == oifindex;
+}
+
void l3mdev_update_flow(struct net *net, struct flowi *fl);
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
@@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
{
return 1;
}
+
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return false;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return false;
+}
+
static inline
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 4bc64d912a1c..7af302080a66 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -257,6 +257,24 @@ static int nla_put_port_range(struct sk_buff *skb, int attrtype,
return nla_put(skb, attrtype, sizeof(*range), range);
}
+static bool fib_rule_iif_match(const struct fib_rule *rule, int iifindex,
+ const struct flowi *fl)
+{
+ u8 iif_is_l3_master = READ_ONCE(rule->iif_is_l3_master);
+
+ return iif_is_l3_master ? l3mdev_fib_rule_iif_match(fl, iifindex) :
+ fl->flowi_iif == iifindex;
+}
+
+static bool fib_rule_oif_match(const struct fib_rule *rule, int oifindex,
+ const struct flowi *fl)
+{
+ u8 oif_is_l3_master = READ_ONCE(rule->oif_is_l3_master);
+
+ return oif_is_l3_master ? l3mdev_fib_rule_oif_match(fl, oifindex) :
+ fl->flowi_oif == oifindex;
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -264,11 +282,11 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
int iifindex, oifindex, ret = 0;
iifindex = READ_ONCE(rule->iifindex);
- if (iifindex && (iifindex != fl->flowi_iif))
+ if (iifindex && !fib_rule_iif_match(rule, iifindex, fl))
goto out;
oifindex = READ_ONCE(rule->oifindex);
- if (oifindex && (oifindex != fl->flowi_oif))
+ if (oifindex && !fib_rule_oif_match(rule, oifindex, fl))
goto out;
if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
@@ -736,16 +754,20 @@ static int fib_nl2rule_rtnl(struct fib_rule *nlrule,
struct net_device *dev;
dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname);
- if (dev)
+ if (dev) {
nlrule->iifindex = dev->ifindex;
+ nlrule->iif_is_l3_master = netif_is_l3_master(dev);
+ }
}
if (tb[FRA_OIFNAME]) {
struct net_device *dev;
dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname);
- if (dev)
+ if (dev) {
nlrule->oifindex = dev->ifindex;
+ nlrule->oif_is_l3_master = netif_is_l3_master(dev);
+ }
}
return 0;
@@ -1336,11 +1358,17 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
list_for_each_entry(rule, rules, list) {
if (rule->iifindex == -1 &&
- strcmp(dev->name, rule->iifname) == 0)
+ strcmp(dev->name, rule->iifname) == 0) {
WRITE_ONCE(rule->iifindex, dev->ifindex);
+ WRITE_ONCE(rule->iif_is_l3_master,
+ netif_is_l3_master(dev));
+ }
if (rule->oifindex == -1 &&
- strcmp(dev->name, rule->oifname) == 0)
+ strcmp(dev->name, rule->oifname) == 0) {
WRITE_ONCE(rule->oifindex, dev->ifindex);
+ WRITE_ONCE(rule->oif_is_l3_master,
+ netif_is_l3_master(dev));
+ }
}
}
@@ -1349,10 +1377,14 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
struct fib_rule *rule;
list_for_each_entry(rule, rules, list) {
- if (rule->iifindex == dev->ifindex)
+ if (rule->iifindex == dev->ifindex) {
WRITE_ONCE(rule->iifindex, -1);
- if (rule->oifindex == dev->ifindex)
+ WRITE_ONCE(rule->iif_is_l3_master, false);
+ }
+ if (rule->oifindex == dev->ifindex) {
WRITE_ONCE(rule->oifindex, -1);
+ WRITE_ONCE(rule->oif_is_l3_master, false);
+ }
}
}
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index ca10916340b0..5432a5f2dfc8 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -277,8 +277,10 @@ void l3mdev_update_flow(struct net *net, struct flowi *fl)
if (fl->flowi_oif) {
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
if (dev) {
- if (!fl->flowi_l3mdev)
+ if (!fl->flowi_l3mdev) {
fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+ fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF;
+ }
/* oif set to L3mdev directs lookup to its table;
* reset to avoid oif match in fib_lookup
--
2.49.0
Powered by blists - more mailing lists