lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240630092308.0288083a@hermes.local>
Date: Sun, 30 Jun 2024 09:23:08 -0700
From: Stephen Hemminger <stephen@...workplumber.org>
To: Ido Schimmel <idosch@...sch.org>
Cc: "Muggeridge, Matt" <matt.muggeridge2@....com>, "netdev@...r.kernel.org"
 <netdev@...r.kernel.org>
Subject: Re: "ip route show dev enp0s9" does not show all routes for enp0s9

On Sun, 30 Jun 2024 13:39:35 +0300
Ido Schimmel <idosch@...sch.org> wrote:

> On Fri, Jun 28, 2024 at 02:54:58AM +0000, Muggeridge, Matt wrote:
> > > From: Stephen Hemminger <stephen@...workplumber.org>
> > > Sent: Friday, June 28, 2024 12:37 PM
> > > 
> > > On Fri, 28 Jun 2024 00:01:47 +0000
> > > "Muggeridge, Matt" <matt.muggeridge2@....com> wrote:
> > >   
> > > > Hi,
> > > >
> > > > This looks like a problem in "iproute2".  This was observed on a fresh install  
> > > of Ubuntu 24.04, with Linux 6.8.0-36-generic.  
> > > >
> > > > NOTE: I first raised this in  
> > > https://bugs.launchpad.net/ubuntu/+source/iproute2/+bug/2070412, then
> > > later found https://github.com/iproute2/iproute2/blob/main/README.devel.  
> > > >
> > > > * PROBLEM
> > > > Compare the outputs:
> > > >
> > > > $ ip -6 route show dev enp0s9
> > > > 2001:2:0:1000::/64 proto ra metric 1024 expires 65518sec pref medium
> > > > fe80::/64 proto kernel metric 256 pref medium
> > > >
> > > > $ ip -6 route
> > > > 2001:2:0:1000::/64 dev enp0s9 proto ra metric 1024 expires 65525sec
> > > > pref medium
> > > > fe80::/64 dev enp0s3 proto kernel metric 256 pref medium
> > > > fe80::/64 dev enp0s9 proto kernel metric 256 pref medium default proto
> > > > ra metric 1024 expires 589sec pref medium  nexthop via
> > > > fe80::200:10ff:fe10:1060 dev enp0s9 weight 1  nexthop via
> > > > fe80::200:10ff:fe10:1061 dev enp0s9 weight 1
> > > >
> > > > The default route is associated with enp0s9, yet the first command above  
> > > does not show it.  
> > > >
> > > > FWIW, the two default route entries were created by two separate routers  
> > > on the network, each sending their RA.  
> > > >
> > > > * REPRODUCER
> > > > Statically Configure systemd-networkd with two route entries, similar to the  
> > > following:  
> > > >
> > > > $ networkctl cat 10-enp0s9.network
> > > > # /etc/systemd/network/10-enp0s9.network
> > > > [Match]
> > > > Name=enp0s9
> > > >
> > > > [Link]
> > > > RequiredForOnline=no
> > > >
> > > > [Network]
> > > > Description="Internal Network: Private VM-to-VM IPv6 interface"
> > > > DHCP=no
> > > > LLDP=no
> > > > EmitLLDP=no
> > > >
> > > >
> > > > # /etc/systemd/network/10-enp0s9.network.d/address.conf
> > > > [Network]
> > > > Address=2001:2:0:1000:a00:27ff:fe5f:f72d/64
> > > >
> > > >
> > > > # /etc/systemd/network/10-enp0s9.network.d/route-1060.conf
> > > > [Route]
> > > > Gateway=fe80::200:10ff:fe10:1060
> > > > GatewayOnLink=true
> > > >
> > > >
> > > > # /etc/systemd/network/10-enp0s9.network.d/route-1061.conf
> > > > [Route]
> > > > Gateway=fe80::200:10ff:fe10:1061
> > > > GatewayOnLink=true
> > > >
> > > >
> > > >
> > > > Now reload and reconfigure the interface and you will see two routes.
> > > >
> > > > $ networkctl reload
> > > > $ networkctl reconfigure enp0s9
> > > > $ ip -6 r
> > > > $ ip -6 r show dev enp0s9 # the routes are not shown
> > > >  
> > > 
> > > "Don't blame the messenger", the ip command only reports what the kernel
> > > sends. So it is likely a route semantics issue in the kernel.  
> > 
> > Thanks Stephen.
> > 
> > Ok, I have reported it on my distro in https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2071406.
> > 
> > I guess the kernel netdev folks will see this thread and can comment too?  
> 
> The problem seems to be in iproute2 and not in the kernel. Both IPv4 and
> IPv6 will dump the route if at least one of the nexthop devices is the
> one specified by user space. You can see the routes in the strace output
> below.
> 
> ip link add name dummy1 up type dummy
> ip link add name dummy2 up type dummy
> ip address add 192.0.2.1/28 dev dummy1
> ip address add 192.0.2.17/28 dev dummy2
> ip addres add 2001:db8:1::1/64 dev dummy1
> ip addres add 2001:db8:2::1/64 dev dummy2
> ip route add 198.51.100.0/24 nexthop via 192.0.2.2 dev dummy1 nexthop via 192.0.2.18 dev dummy2
> ip route add 2001:db8:10::/64 nexthop via 2001:db8:1::2 dev dummy1 nexthop via 2001:db8:2::2 dev dummy2
> 
> # strace -e network ip -4 route show dev dummy1
> [...]
> recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=60, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=704}, {rtm_family=AF_INET, rtm_dst_len=28, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_LINK, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=8, nla_type=RTA_DST}, inet_addr("192.0.2.0")], [{nla_len=8, nla_type=RTA_PREFSRC}, inet_addr("192.0.2.1")], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")]]], [{nlmsg_len=80, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=704}, {rtm_family=AF_INET, rtm_dst_len=24, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], 
 [{nla_len=8, nla_type=RTA_DST}, inet_addr("198.51.100.0")], [{nla_len=36, nla_type=RTA_MULTIPATH}, [[{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("192.0.2.2")]], [{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy2")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("192.0.2.18")]]]]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 140
> 
> # strace -e network ip -6 route show dev dummy1
> [...]
> recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=116, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "2001:db8:1::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 256], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]], [{nlmsg_len=168, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src
 _len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "2001:db8:10::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 1024], [{nla_len=60, nla_type=RTA_MULTIPATH}, [[{rtnh_len=28, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy1")}, [{nla_len=20, nla_type=RTA_GATEWAY}, inet_pton(AF_INET6, "2001:db8:1::2")]], [{rtnh_len=28, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy2")}, [{nla_len=20, nla_type=RTA_GATEWAY}, inet_pton(AF_INET6, "2001:db8:2::2")]]]], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]], [{nlmsg_len=116, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_
 dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "fe80::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 256], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 400
> 
> Following patch works for me [1], but it is missing support for
> RTA_GATEWAY which is also present in the RTA_MULTIPATH nest.
> 
> [1]
> diff --git a/ip/iproute.c b/ip/iproute.c
> index b53046116826..3999853a1455 100644
> --- a/ip/iproute.c
> +++ b/ip/iproute.c
> @@ -310,12 +310,28 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
>  			return 0;
>  	}
>  	if (filter.oifmask) {
> -		int oif = 0;
> +		if (tb[RTA_OIF]) {
> +			int oif = rta_getattr_u32(tb[RTA_OIF]);
>  
> -		if (tb[RTA_OIF])
> -			oif = rta_getattr_u32(tb[RTA_OIF]);
> -		if ((oif^filter.oif)&filter.oifmask)
> -			return 0;
> +			if ((oif ^ filter.oif) & filter.oifmask)
> +				return 0;
> +		} else if (tb[RTA_MULTIPATH]) {
> +			const struct rtnexthop *nh = RTA_DATA(tb[RTA_MULTIPATH]);
> +			int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
> +			bool dev_match = false;
> +
> +			while (len >= sizeof(*nh)) {
> +				if (nh->rtnh_ifindex == filter.oif) {
> +					dev_match = true;
> +					break;
> +				}
> +
> +				len -= NLMSG_ALIGN(nh->rtnh_len);
> +				nh = RTNH_NEXT(nh);
> +			}
> +			if (!dev_match)
> +				return 0;
> +		}
>  	}
>  	if (filter.markmask) {
>  		int mark = 0;


Good catch, original code did not handle multipath in filtering.

Suggest moving the loop into helper function for clarity

diff --git a/ip/iproute.c b/ip/iproute.c
index b5304611..44666240 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -154,6 +154,24 @@ static int flush_update(void)
 	return 0;
 }
 
+static bool filter_multipath(const struct rtattr *rta)
+{
+	const struct rtnexthop *nh = RTA_DATA(rta);
+	int len = RTA_PAYLOAD(rta);
+
+	while (len >= sizeof(*nh)) {
+		if (nh->rtnh_len > len)
+			break;
+
+		if (!((nh->rtnh_ifindex ^ filter.oif) & filter.oifmask))
+			return true;
+
+		len -= NLMSG_ALIGN(nh->rtnh_len);
+		nh = RTNH_NEXT(nh);
+	}
+	return false;
+}
+
 static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 {
 	struct rtmsg *r = NLMSG_DATA(n);
@@ -310,12 +328,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 			return 0;
 	}
 	if (filter.oifmask) {
-		int oif = 0;
+		if (tb[RTA_OIF]) {
+			int oif = rta_getattr_u32(tb[RTA_OIF]);
 
-		if (tb[RTA_OIF])
-			oif = rta_getattr_u32(tb[RTA_OIF]);
-		if ((oif^filter.oif)&filter.oifmask)
-			return 0;
+			if ((oif ^ filter.oif) & filter.oifmask)
+				return 0;
+		} else if (tb[RTA_MULTIPATH]) {
+			if (!filter_multipath(tb[RTA_MULTIPATH]))
+				return 0;
+		}
 	}
 	if (filter.markmask) {
 		int mark = 0;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ