[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1f3c874fb825cdc030f729d2e48e6f45f3e3527f.1712347466.git.gnault@redhat.com>
Date: Fri, 5 Apr 2024 22:05:00 +0200
From: Guillaume Nault <gnault@...hat.com>
To: David Miller <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>
Cc: netdev@...r.kernel.org, Mustafa Ismail <mustafa.ismail@...el.com>,
Shiraz Saleem <shiraz.saleem@...el.com>,
Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Michal Kalderon <mkalderon@...vell.com>,
Ariel Elior <aelior@...vell.com>,
Jay Vosburgh <j.vosburgh@...il.com>,
Andy Gospodarek <andy@...yhouse.net>,
David Ahern <dsahern@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Jozsef Kadlecsik <kadlec@...filter.org>,
Roopa Prabhu <roopa@...dia.com>,
Nikolay Aleksandrov <razor@...ckwall.org>
Subject: [PATCH net-next] ipv4: Set scope explicitly in ip_route_output().
Add a "scope" parameter to ip_route_output() so that callers don't have
to override the tos parameter with the RTO_ONLINK flag if they want a
local scope.
This will allow converting flowi4_tos to dscp_t in the future, thus
allowing static analysers to flag invalid interactions between
"tos" (the DSCP bits) and ECN.
Only three users ask for local scope (bonding, arp and atm). The others
continue to use RT_SCOPE_UNIVERSE. While there, add a comment to warn
users about the limitations of ip_route_output().
Signed-off-by: Guillaume Nault <gnault@...hat.com>
---
drivers/infiniband/hw/irdma/cm.c | 3 ++-
drivers/infiniband/hw/qedr/qedr_iw_cm.c | 3 ++-
drivers/net/bonding/bond_main.c | 4 ++--
drivers/net/ethernet/broadcom/cnic.c | 3 ++-
include/net/route.h | 9 ++++++++-
net/atm/clip.c | 2 +-
net/bridge/br_netfilter_hooks.c | 3 ++-
net/ipv4/arp.c | 9 ++++++---
net/ipv4/igmp.c | 3 ++-
net/mpls/af_mpls.c | 2 +-
10 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 1ee7a4e0d8d8..36bb7e5ce638 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1985,7 +1985,8 @@ static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip,
__be32 dst_ipaddr = htonl(dst_ip);
__be32 src_ipaddr = htonl(src_ip);
- rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt)) {
ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n");
return -EINVAL;
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index a51fc6854984..259303b9907c 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -447,7 +447,8 @@ qedr_addr4_resolve(struct qedr_dev *dev,
struct rtable *rt = NULL;
int rc = 0;
- rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0);
+ rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt)) {
DP_ERR(dev, "ip_route_output returned error\n");
return -EINVAL;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2c5ed0a7cb18..c9f0415f780a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3014,8 +3014,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
tags = NULL;
/* Find out through which dev should the packet go */
- rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
- RTO_ONLINK, 0);
+ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt)) {
/* there's no route to target - try to send arp
* probe to generate any traffic (arp_validate=0)
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 3d63177e7e52..c2b4188a1ef1 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3682,7 +3682,8 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
#if defined(CONFIG_INET)
struct rtable *rt;
- rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0);
+ rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
*dst = &rt->dst;
return 0;
diff --git a/include/net/route.h b/include/net/route.h
index d4a0147942f1..315a8acee6c6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -141,15 +141,22 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4
return ip_route_output_flow(net, flp, NULL);
}
+/* Simplistic IPv4 route lookup function.
+ * This is only suitable for some particular use cases: since the flowi4
+ * structure is only partially set, it may bypass some fib-rules.
+ */
static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
- __be32 saddr, u8 tos, int oif)
+ __be32 saddr, u8 tos, int oif,
+ __u8 scope)
{
struct flowi4 fl4 = {
.flowi4_oif = oif,
.flowi4_tos = tos,
+ .flowi4_scope = scope,
.daddr = daddr,
.saddr = saddr,
};
+
return ip_route_output_key(net, &fl4);
}
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 294cb9efe3d3..362e8d25a79e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -463,7 +463,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
unlink_clip_vcc(clip_vcc);
return 0;
}
- rt = ip_route_output(&init_net, ip, 0, 1, 0);
+ rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 35e10c5a766d..4242447be322 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -399,7 +399,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0);
+ RT_TOS(iph->tos), 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0d0d725b46ad..ab82ca104496 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
/*unsigned long now; */
struct net *net = dev_net(dev);
- rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
+ rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev),
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
@@ -1056,7 +1057,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
if (r->arp_flags & ATF_PERM)
r->arp_flags |= ATF_COM;
if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
+ struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -1188,7 +1190,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
+ struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
dev = rt->dst.dev;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 717e97a389a8..9bf09de6a2e7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
if (!dev) {
struct rtable *rt = ip_route_output(net,
imr->imr_multiaddr.s_addr,
- 0, 0, 0);
+ 0, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
dev = rt->dst.dev;
ip_rt_put(rt);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6dab883a08dd..1303acb9cdd2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -594,7 +594,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net,
struct in_addr daddr;
memcpy(&daddr, addr, sizeof(struct in_addr));
- rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
+ rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return ERR_CAST(rt);
--
2.39.2
Powered by blists - more mailing lists