lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZoE15-y0wMhzQEYg@shredder.mtl.com>
Date: Sun, 30 Jun 2024 13:39:35 +0300
From: Ido Schimmel <idosch@...sch.org>
To: "Muggeridge, Matt" <matt.muggeridge2@....com>
Cc: Stephen Hemminger <stephen@...workplumber.org>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: "ip route show dev enp0s9" does not show all routes for enp0s9

On Fri, Jun 28, 2024 at 02:54:58AM +0000, Muggeridge, Matt wrote:
> > From: Stephen Hemminger <stephen@...workplumber.org>
> > Sent: Friday, June 28, 2024 12:37 PM
> > 
> > On Fri, 28 Jun 2024 00:01:47 +0000
> > "Muggeridge, Matt" <matt.muggeridge2@....com> wrote:
> > 
> > > Hi,
> > >
> > > This looks like a problem in "iproute2".  This was observed on a fresh install
> > of Ubuntu 24.04, with Linux 6.8.0-36-generic.
> > >
> > > NOTE: I first raised this in
> > https://bugs.launchpad.net/ubuntu/+source/iproute2/+bug/2070412, then
> > later found https://github.com/iproute2/iproute2/blob/main/README.devel.
> > >
> > > * PROBLEM
> > > Compare the outputs:
> > >
> > > $ ip -6 route show dev enp0s9
> > > 2001:2:0:1000::/64 proto ra metric 1024 expires 65518sec pref medium
> > > fe80::/64 proto kernel metric 256 pref medium
> > >
> > > $ ip -6 route
> > > 2001:2:0:1000::/64 dev enp0s9 proto ra metric 1024 expires 65525sec
> > > pref medium
> > > fe80::/64 dev enp0s3 proto kernel metric 256 pref medium
> > > fe80::/64 dev enp0s9 proto kernel metric 256 pref medium default proto
> > > ra metric 1024 expires 589sec pref medium  nexthop via
> > > fe80::200:10ff:fe10:1060 dev enp0s9 weight 1  nexthop via
> > > fe80::200:10ff:fe10:1061 dev enp0s9 weight 1
> > >
> > > The default route is associated with enp0s9, yet the first command above
> > does not show it.
> > >
> > > FWIW, the two default route entries were created by two separate routers
> > on the network, each sending their RA.
> > >
> > > * REPRODUCER
> > > Statically Configure systemd-networkd with two route entries, similar to the
> > following:
> > >
> > > $ networkctl cat 10-enp0s9.network
> > > # /etc/systemd/network/10-enp0s9.network
> > > [Match]
> > > Name=enp0s9
> > >
> > > [Link]
> > > RequiredForOnline=no
> > >
> > > [Network]
> > > Description="Internal Network: Private VM-to-VM IPv6 interface"
> > > DHCP=no
> > > LLDP=no
> > > EmitLLDP=no
> > >
> > >
> > > # /etc/systemd/network/10-enp0s9.network.d/address.conf
> > > [Network]
> > > Address=2001:2:0:1000:a00:27ff:fe5f:f72d/64
> > >
> > >
> > > # /etc/systemd/network/10-enp0s9.network.d/route-1060.conf
> > > [Route]
> > > Gateway=fe80::200:10ff:fe10:1060
> > > GatewayOnLink=true
> > >
> > >
> > > # /etc/systemd/network/10-enp0s9.network.d/route-1061.conf
> > > [Route]
> > > Gateway=fe80::200:10ff:fe10:1061
> > > GatewayOnLink=true
> > >
> > >
> > >
> > > Now reload and reconfigure the interface and you will see two routes.
> > >
> > > $ networkctl reload
> > > $ networkctl reconfigure enp0s9
> > > $ ip -6 r
> > > $ ip -6 r show dev enp0s9 # the routes are not shown
> > >
> > 
> > "Don't blame the messenger", the ip command only reports what the kernel
> > sends. So it is likely a route semantics issue in the kernel.
> 
> Thanks Stephen.
> 
> Ok, I have reported it on my distro in https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2071406.
> 
> I guess the kernel netdev folks will see this thread and can comment too?

The problem seems to be in iproute2 and not in the kernel. Both IPv4 and
IPv6 will dump the route if at least one of the nexthop devices is the
one specified by user space. You can see the routes in the strace output
below.

ip link add name dummy1 up type dummy
ip link add name dummy2 up type dummy
ip address add 192.0.2.1/28 dev dummy1
ip address add 192.0.2.17/28 dev dummy2
ip addres add 2001:db8:1::1/64 dev dummy1
ip addres add 2001:db8:2::1/64 dev dummy2
ip route add 198.51.100.0/24 nexthop via 192.0.2.2 dev dummy1 nexthop via 192.0.2.18 dev dummy2
ip route add 2001:db8:10::/64 nexthop via 2001:db8:1::2 dev dummy1 nexthop via 2001:db8:2::2 dev dummy2

# strace -e network ip -4 route show dev dummy1
[...]
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=60, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=704}, {rtm_family=AF_INET, rtm_dst_len=28, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_LINK, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=8, nla_type=RTA_DST}, inet_addr("192.0.2.0")], [{nla_len=8, nla_type=RTA_PREFSRC}, inet_addr("192.0.2.1")], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")]]], [{nlmsg_len=80, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=704}, {rtm_family=AF_INET, rtm_dst_len=24, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=8, nla_type=RTA_DST}, inet_addr("198.51.100.0")], [{nla_len=36, nla_type=RTA_MULTIPATH}, [[{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("192.0.2.2")]], [{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy2")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("192.0.2.18")]]]]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 140

# strace -e network ip -6 route show dev dummy1
[...]
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=116, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "2001:db8:1::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 256], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]], [{nlmsg_len=168, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "2001:db8:10::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 1024], [{nla_len=60, nla_type=RTA_MULTIPATH}, [[{rtnh_len=28, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy1")}, [{nla_len=20, nla_type=RTA_GATEWAY}, inet_pton(AF_INET6, "2001:db8:1::2")]], [{rtnh_len=28, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("dummy2")}, [{nla_len=20, nla_type=RTA_GATEWAY}, inet_pton(AF_INET6, "2001:db8:2::2")]]]], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]], [{nlmsg_len=116, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_MULTI|NLM_F_DUMP_FILTERED, nlmsg_seq=1719737009, nlmsg_pid=708}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_KERNEL, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_TABLE}, RT_TABLE_MAIN], [{nla_len=20, nla_type=RTA_DST}, inet_pton(AF_INET6, "fe80::")], [{nla_len=8, nla_type=RTA_PRIORITY}, 256], [{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("dummy1")], [{nla_len=36, nla_type=RTA_CACHEINFO}, {rta_clntref=0, rta_lastuse=0, rta_expires=0, rta_error=0, rta_used=0, rta_id=0, rta_ts=0, rta_tsage=0}], [{nla_len=5, nla_type=RTA_PREF}, 0]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 400

Following patch works for me [1], but it is missing support for
RTA_GATEWAY which is also present in the RTA_MULTIPATH nest.

[1]
diff --git a/ip/iproute.c b/ip/iproute.c
index b53046116826..3999853a1455 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -310,12 +310,28 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 			return 0;
 	}
 	if (filter.oifmask) {
-		int oif = 0;
+		if (tb[RTA_OIF]) {
+			int oif = rta_getattr_u32(tb[RTA_OIF]);
 
-		if (tb[RTA_OIF])
-			oif = rta_getattr_u32(tb[RTA_OIF]);
-		if ((oif^filter.oif)&filter.oifmask)
-			return 0;
+			if ((oif ^ filter.oif) & filter.oifmask)
+				return 0;
+		} else if (tb[RTA_MULTIPATH]) {
+			const struct rtnexthop *nh = RTA_DATA(tb[RTA_MULTIPATH]);
+			int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
+			bool dev_match = false;
+
+			while (len >= sizeof(*nh)) {
+				if (nh->rtnh_ifindex == filter.oif) {
+					dev_match = true;
+					break;
+				}
+
+				len -= NLMSG_ALIGN(nh->rtnh_len);
+				nh = RTNH_NEXT(nh);
+			}
+			if (!dev_match)
+				return 0;
+		}
 	}
 	if (filter.markmask) {
 		int mark = 0;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ