[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ZhevBkZJBfgLjyLL@Laptop-X1>
Date: Thu, 11 Apr 2024 17:36:06 +0800
From: Hangbin Liu <liuhangbin@...il.com>
To: Guillaume Nault <gnault@...hat.com>
Cc: David Miller <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>,
netdev@...r.kernel.org, Mustafa Ismail <mustafa.ismail@...el.com>,
Shiraz Saleem <shiraz.saleem@...el.com>,
Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Michal Kalderon <mkalderon@...vell.com>,
Ariel Elior <aelior@...vell.com>,
Jay Vosburgh <j.vosburgh@...il.com>,
Andy Gospodarek <andy@...yhouse.net>,
David Ahern <dsahern@...nel.org>,
Jozsef Kadlecsik <kadlec@...filter.org>,
Roopa Prabhu <roopa@...dia.com>,
Nikolay Aleksandrov <razor@...ckwall.org>
Subject: Re: [PATCH net-next] ipv4: Set scope explicitly in ip_route_output().
On Fri, Apr 05, 2024 at 10:05:00PM +0200, Guillaume Nault wrote:
> Add a "scope" parameter to ip_route_output() so that callers don't have
> to override the tos parameter with the RTO_ONLINK flag if they want a
> local scope.
>
> This will allow converting flowi4_tos to dscp_t in the future, thus
> allowing static analysers to flag invalid interactions between
> "tos" (the DSCP bits) and ECN.
>
> Only three users ask for local scope (bonding, arp and atm). The others
> continue to use RT_SCOPE_UNIVERSE. While there, add a comment to warn
> users about the limitations of ip_route_output().
>
> Signed-off-by: Guillaume Nault <gnault@...hat.com>
> ---
> drivers/infiniband/hw/irdma/cm.c | 3 ++-
> drivers/infiniband/hw/qedr/qedr_iw_cm.c | 3 ++-
> drivers/net/bonding/bond_main.c | 4 ++--
> drivers/net/ethernet/broadcom/cnic.c | 3 ++-
> include/net/route.h | 9 ++++++++-
> net/atm/clip.c | 2 +-
> net/bridge/br_netfilter_hooks.c | 3 ++-
> net/ipv4/arp.c | 9 ++++++---
> net/ipv4/igmp.c | 3 ++-
> net/mpls/af_mpls.c | 2 +-
> 10 files changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
> index 1ee7a4e0d8d8..36bb7e5ce638 100644
> --- a/drivers/infiniband/hw/irdma/cm.c
> +++ b/drivers/infiniband/hw/irdma/cm.c
> @@ -1985,7 +1985,8 @@ static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip,
> __be32 dst_ipaddr = htonl(dst_ip);
> __be32 src_ipaddr = htonl(src_ip);
>
> - rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
> + rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0,
> + RT_SCOPE_UNIVERSE);
> if (IS_ERR(rt)) {
> ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n");
> return -EINVAL;
> diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
> index a51fc6854984..259303b9907c 100644
> --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
> +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
> @@ -447,7 +447,8 @@ qedr_addr4_resolve(struct qedr_dev *dev,
> struct rtable *rt = NULL;
> int rc = 0;
>
> - rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0);
> + rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0,
> + RT_SCOPE_UNIVERSE);
> if (IS_ERR(rt)) {
> DP_ERR(dev, "ip_route_output returned error\n");
> return -EINVAL;
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 2c5ed0a7cb18..c9f0415f780a 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -3014,8 +3014,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
> tags = NULL;
>
> /* Find out through which dev should the packet go */
> - rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
> - RTO_ONLINK, 0);
> + rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0,
> + RT_SCOPE_LINK);
> if (IS_ERR(rt)) {
> /* there's no route to target - try to send arp
> * probe to generate any traffic (arp_validate=0)
> diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
> index 3d63177e7e52..c2b4188a1ef1 100644
> --- a/drivers/net/ethernet/broadcom/cnic.c
> +++ b/drivers/net/ethernet/broadcom/cnic.c
> @@ -3682,7 +3682,8 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
> #if defined(CONFIG_INET)
> struct rtable *rt;
>
> - rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0);
> + rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0,
> + RT_SCOPE_UNIVERSE);
> if (!IS_ERR(rt)) {
> *dst = &rt->dst;
> return 0;
> diff --git a/include/net/route.h b/include/net/route.h
> index d4a0147942f1..315a8acee6c6 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -141,15 +141,22 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4
> return ip_route_output_flow(net, flp, NULL);
> }
>
> +/* Simplistic IPv4 route lookup function.
> + * This is only suitable for some particular use cases: since the flowi4
> + * structure is only partially set, it may bypass some fib-rules.
> + */
> static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
> - __be32 saddr, u8 tos, int oif)
> + __be32 saddr, u8 tos, int oif,
> + __u8 scope)
> {
> struct flowi4 fl4 = {
> .flowi4_oif = oif,
> .flowi4_tos = tos,
> + .flowi4_scope = scope,
> .daddr = daddr,
> .saddr = saddr,
> };
> +
> return ip_route_output_key(net, &fl4);
> }
>
> diff --git a/net/atm/clip.c b/net/atm/clip.c
> index 294cb9efe3d3..362e8d25a79e 100644
> --- a/net/atm/clip.c
> +++ b/net/atm/clip.c
> @@ -463,7 +463,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
> unlink_clip_vcc(clip_vcc);
> return 0;
> }
> - rt = ip_route_output(&init_net, ip, 0, 1, 0);
> + rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK);
> if (IS_ERR(rt))
> return PTR_ERR(rt);
> neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
> diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
> index 35e10c5a766d..4242447be322 100644
> --- a/net/bridge/br_netfilter_hooks.c
> +++ b/net/bridge/br_netfilter_hooks.c
> @@ -399,7 +399,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
> goto free_skb;
>
> rt = ip_route_output(net, iph->daddr, 0,
> - RT_TOS(iph->tos), 0);
> + RT_TOS(iph->tos), 0,
> + RT_SCOPE_UNIVERSE);
> if (!IS_ERR(rt)) {
> /* - Bridged-and-DNAT'ed traffic doesn't
> * require ip_forwarding. */
> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
> index 0d0d725b46ad..ab82ca104496 100644
> --- a/net/ipv4/arp.c
> +++ b/net/ipv4/arp.c
> @@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
> /*unsigned long now; */
> struct net *net = dev_net(dev);
>
> - rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
> + rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev),
> + RT_SCOPE_UNIVERSE);
> if (IS_ERR(rt))
> return 1;
> if (rt->dst.dev != dev) {
> @@ -1056,7 +1057,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
> if (r->arp_flags & ATF_PERM)
> r->arp_flags |= ATF_COM;
> if (!dev) {
> - struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
> + struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
> + RT_SCOPE_LINK);
>
> if (IS_ERR(rt))
> return PTR_ERR(rt);
> @@ -1188,7 +1190,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
>
> ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
> if (!dev) {
> - struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
> + struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
> + RT_SCOPE_LINK);
> if (IS_ERR(rt))
> return PTR_ERR(rt);
> dev = rt->dst.dev;
> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
> index 717e97a389a8..9bf09de6a2e7 100644
> --- a/net/ipv4/igmp.c
> +++ b/net/ipv4/igmp.c
> @@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
> if (!dev) {
> struct rtable *rt = ip_route_output(net,
> imr->imr_multiaddr.s_addr,
> - 0, 0, 0);
> + 0, 0, 0,
> + RT_SCOPE_UNIVERSE);
> if (!IS_ERR(rt)) {
> dev = rt->dst.dev;
> ip_rt_put(rt);
> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
> index 6dab883a08dd..1303acb9cdd2 100644
> --- a/net/mpls/af_mpls.c
> +++ b/net/mpls/af_mpls.c
> @@ -594,7 +594,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net,
> struct in_addr daddr;
>
> memcpy(&daddr, addr, sizeof(struct in_addr));
> - rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
> + rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
> if (IS_ERR(rt))
> return ERR_CAST(rt);
>
> --
> 2.39.2
>
Reviewed-by: Hangbin Liu <liuhangbin@...il.com>
Powered by blists - more mailing lists