lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aF6tpb4EQaxZ2XAw@shredder>
Date: Fri, 27 Jun 2025 17:41:41 +0300
From: Ido Schimmel <idosch@...sch.org>
To: "dongchenchen (A)" <dongchenchen2@...wei.com>
Cc: Jakub Kicinski <kuba@...nel.org>, davem@...emloft.net,
	edumazet@...gle.com, pabeni@...hat.com, horms@...nel.org,
	jiri@...nulli.us, oscmaes92@...il.com, linux@...blig.org,
	pedro.netdev@...devamos.com, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org, yuehaibing@...wei.com,
	zhangchangzhong@...wei.com
Subject: Re: [PATCH net] net: vlan: fix VLAN 0 refcount imbalance of toggling
 filtering during runtime

On Thu, Jun 26, 2025 at 11:41:45AM +0800, dongchenchen (A) wrote:
> maybe we can fix it by:
> 
> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
> index 6e01ece0a95c..262f8d3f06ef 100644
> --- a/net/8021q/vlan.c
> +++ b/net/8021q/vlan.c
> @@ -378,14 +378,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>              return notifier_from_errno(err);
>      }
> -    if ((event == NETDEV_UP) &&
> -        (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
> +    if (((event == NETDEV_UP) &&
> +        (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) ||
> +        (event == NETDEV_CVLAN_FILTER_PUSH_INFO &&
> +        (dev->flags & IFF_UP))) {
>          pr_info("adding VLAN 0 to HW filter on device %s\n",
>              dev->name);
>          vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
>      }
> -    if (event == NETDEV_DOWN &&
> -        (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
> +    if ((event == NETDEV_DOWN &&
> +        (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) ||
> +        (event == NETDEV_CVLAN_FILTER_DROP_INFO &&
> +        (dev->flags & IFF_UP)))
>          vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
>     vlan_info = rtnl_dereference(dev->vlan_info);

As I understand it, there are two issues:

1. VID 0 is deleted when it shouldn't. This leads to the crashes you
shared.

2. VID 0 is not deleted when it should. This leads to memory leaks like
[1] with a reproducer such as [2].

AFAICT, your proposed patch solves the second issue, but only partially
addresses the first issue. The automatic addition of VID 0 is assumed to
be successful, but it can fail due to hardware issues or memory
allocation failures. I realize it is not common, but it can happen. If
you annotate __vlan_vid_add() [3] and inject failures [4], you will see
that the crashes still happen with your patch.

WDYT about something like [5]? Basically, noting in the VLAN info
whether VID 0 was automatically added upon NETDEV_UP and based on that
decide whether it should be deleted upon NETDEV_DOWN, regardless of
"rx-vlan-filter".

[1]
unreferenced object 0xffff888007468a00 (size 256):
  comm "ip", pid 386, jiffies 4294820761
  hex dump (first 32 bytes):
    00 20 26 0a 80 88 ff ff 00 00 00 00 00 00 00 00  . &.............
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace (crc 43031ab1):
    __kmalloc_cache_noprof+0x2b5/0x340
    vlan_vid_add+0x434/0x940
    vlan_device_event.cold+0x75/0xa8
    notifier_call_chain+0xca/0x150
    __dev_notify_flags+0xe3/0x250
    rtnl_configure_link+0x193/0x260
    rtnl_newlink_create+0x383/0x8e0
    __rtnl_newlink+0x22c/0xa40
    rtnl_newlink+0x627/0xb00
    rtnetlink_rcv_msg+0x6fb/0xb70
    netlink_rcv_skb+0x11f/0x350
    netlink_unicast+0x426/0x710
    netlink_sendmsg+0x75a/0xc20
    __sock_sendmsg+0xc1/0x150
    ____sys_sendmsg+0x5aa/0x7b0
    ___sys_sendmsg+0xfc/0x180
unreferenced object 0xffff888002fc3aa0 (size 32):
  comm "ip", pid 386, jiffies 4294820761
  hex dump (first 32 bytes):
    a0 8a 46 07 80 88 ff ff a0 8a 46 07 80 88 ff ff  ..F.......F.....
    81 00 00 00 01 00 00 00 cc cc cc cc cc cc cc cc  ................
  backtrace (crc c2f2186c):
    __kmalloc_cache_noprof+0x2b5/0x340
    vlan_vid_add+0x25a/0x940
    vlan_device_event.cold+0x75/0xa8
    notifier_call_chain+0xca/0x150
    __dev_notify_flags+0xe3/0x250
    rtnl_configure_link+0x193/0x260
    rtnl_newlink_create+0x383/0x8e0
    __rtnl_newlink+0x22c/0xa40
    rtnl_newlink+0x627/0xb00
    rtnetlink_rcv_msg+0x6fb/0xb70
    netlink_rcv_skb+0x11f/0x350
    netlink_unicast+0x426/0x710
    netlink_sendmsg+0x75a/0xc20
    __sock_sendmsg+0xc1/0x150
    ____sys_sendmsg+0x5aa/0x7b0
    ___sys_sendmsg+0xfc/0x180

[2]
#!/bin/bash

ip link add bond1 up type bond mode 0
ethtool -K bond1 rx-vlan-filter off
ip link del dev bond1

[3]
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9404dd551dfd..6dc25c4877f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -314,6 +314,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 	*pvid_info = vid_info;
 	return 0;
 }
+ALLOW_ERROR_INJECTION(__vlan_vid_add, ERRNO);
 
 int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
 {

[4]
#!/bin/bash

echo __vlan_vid_add > /sys/kernel/debug/fail_function/inject
printf %#x -12 > /sys/kernel/debug/fail_function/__vlan_vid_add/retval
echo 100 > /sys/kernel/debug/fail_function/probability
echo 1 > /sys/kernel/debug/fail_function/times
echo 1 > /sys/kernel/debug/fail_function/verbose
ip link add bond1 up type bond mode 0
ip link add link bond1 name vlan0 type vlan id 0 protocol 802.1q
ip link set dev bond1 down
ip link del vlan0
ip link del dev bond1

[5]
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 06908e37c3d9..9a6df8c1daf9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -357,6 +357,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
 	return err;
 }
 
+static void vlan_vid0_add(struct net_device *dev)
+{
+	struct vlan_info *vlan_info;
+	int err;
+
+	if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		return;
+
+	pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name);
+
+	err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
+	if (err)
+		return;
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	vlan_info->auto_vid0 = true;
+}
+
+static void vlan_vid0_del(struct net_device *dev)
+{
+	struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info);
+
+	if (!vlan_info || !vlan_info->auto_vid0)
+		return;
+
+	vlan_info->auto_vid0 = false;
+	vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+}
+
 static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			     void *ptr)
 {
@@ -378,15 +407,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			return notifier_from_errno(err);
 	}
 
-	if ((event == NETDEV_UP) &&
-	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
-		pr_info("adding VLAN 0 to HW filter on device %s\n",
-			dev->name);
-		vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
-	}
-	if (event == NETDEV_DOWN &&
-	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
-		vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+	if (event == NETDEV_UP)
+		vlan_vid0_add(dev);
+	else if (event == NETDEV_DOWN)
+		vlan_vid0_del(dev);
 
 	vlan_info = rtnl_dereference(dev->vlan_info);
 	if (!vlan_info)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5eaf38875554..c7ffe591d593 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,6 +33,7 @@ struct vlan_info {
 	struct vlan_group	grp;
 	struct list_head	vid_list;
 	unsigned int		nr_vids;
+	bool			auto_vid0;
 	struct rcu_head		rcu;
 };

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ