[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aF6tpb4EQaxZ2XAw@shredder>
Date: Fri, 27 Jun 2025 17:41:41 +0300
From: Ido Schimmel <idosch@...sch.org>
To: "dongchenchen (A)" <dongchenchen2@...wei.com>
Cc: Jakub Kicinski <kuba@...nel.org>, davem@...emloft.net,
edumazet@...gle.com, pabeni@...hat.com, horms@...nel.org,
jiri@...nulli.us, oscmaes92@...il.com, linux@...blig.org,
pedro.netdev@...devamos.com, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, yuehaibing@...wei.com,
zhangchangzhong@...wei.com
Subject: Re: [PATCH net] net: vlan: fix VLAN 0 refcount imbalance of toggling
filtering during runtime
On Thu, Jun 26, 2025 at 11:41:45AM +0800, dongchenchen (A) wrote:
> maybe we can fix it by:
>
> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
> index 6e01ece0a95c..262f8d3f06ef 100644
> --- a/net/8021q/vlan.c
> +++ b/net/8021q/vlan.c
> @@ -378,14 +378,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
> return notifier_from_errno(err);
> }
> - if ((event == NETDEV_UP) &&
> - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
> + if (((event == NETDEV_UP) &&
> + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) ||
> + (event == NETDEV_CVLAN_FILTER_PUSH_INFO &&
> + (dev->flags & IFF_UP))) {
> pr_info("adding VLAN 0 to HW filter on device %s\n",
> dev->name);
> vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
> }
> - if (event == NETDEV_DOWN &&
> - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
> + if ((event == NETDEV_DOWN &&
> + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) ||
> + (event == NETDEV_CVLAN_FILTER_DROP_INFO &&
> + (dev->flags & IFF_UP)))
> vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
> vlan_info = rtnl_dereference(dev->vlan_info);
As I understand it, there are two issues:
1. VID 0 is deleted when it shouldn't. This leads to the crashes you
shared.
2. VID 0 is not deleted when it should. This leads to memory leaks like
[1] with a reproducer such as [2].
AFAICT, your proposed patch solves the second issue, but only partially
addresses the first issue. The automatic addition of VID 0 is assumed to
be successful, but it can fail due to hardware issues or memory
allocation failures. I realize it is not common, but it can happen. If
you annotate __vlan_vid_add() [3] and inject failures [4], you will see
that the crashes still happen with your patch.
WDYT about something like [5]? Basically, noting in the VLAN info
whether VID 0 was automatically added upon NETDEV_UP and based on that
decide whether it should be deleted upon NETDEV_DOWN, regardless of
"rx-vlan-filter".
[1]
unreferenced object 0xffff888007468a00 (size 256):
comm "ip", pid 386, jiffies 4294820761
hex dump (first 32 bytes):
00 20 26 0a 80 88 ff ff 00 00 00 00 00 00 00 00 . &.............
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace (crc 43031ab1):
__kmalloc_cache_noprof+0x2b5/0x340
vlan_vid_add+0x434/0x940
vlan_device_event.cold+0x75/0xa8
notifier_call_chain+0xca/0x150
__dev_notify_flags+0xe3/0x250
rtnl_configure_link+0x193/0x260
rtnl_newlink_create+0x383/0x8e0
__rtnl_newlink+0x22c/0xa40
rtnl_newlink+0x627/0xb00
rtnetlink_rcv_msg+0x6fb/0xb70
netlink_rcv_skb+0x11f/0x350
netlink_unicast+0x426/0x710
netlink_sendmsg+0x75a/0xc20
__sock_sendmsg+0xc1/0x150
____sys_sendmsg+0x5aa/0x7b0
___sys_sendmsg+0xfc/0x180
unreferenced object 0xffff888002fc3aa0 (size 32):
comm "ip", pid 386, jiffies 4294820761
hex dump (first 32 bytes):
a0 8a 46 07 80 88 ff ff a0 8a 46 07 80 88 ff ff ..F.......F.....
81 00 00 00 01 00 00 00 cc cc cc cc cc cc cc cc ................
backtrace (crc c2f2186c):
__kmalloc_cache_noprof+0x2b5/0x340
vlan_vid_add+0x25a/0x940
vlan_device_event.cold+0x75/0xa8
notifier_call_chain+0xca/0x150
__dev_notify_flags+0xe3/0x250
rtnl_configure_link+0x193/0x260
rtnl_newlink_create+0x383/0x8e0
__rtnl_newlink+0x22c/0xa40
rtnl_newlink+0x627/0xb00
rtnetlink_rcv_msg+0x6fb/0xb70
netlink_rcv_skb+0x11f/0x350
netlink_unicast+0x426/0x710
netlink_sendmsg+0x75a/0xc20
__sock_sendmsg+0xc1/0x150
____sys_sendmsg+0x5aa/0x7b0
___sys_sendmsg+0xfc/0x180
[2]
#!/bin/bash
ip link add bond1 up type bond mode 0
ethtool -K bond1 rx-vlan-filter off
ip link del dev bond1
[3]
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9404dd551dfd..6dc25c4877f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -314,6 +314,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
*pvid_info = vid_info;
return 0;
}
+ALLOW_ERROR_INJECTION(__vlan_vid_add, ERRNO);
int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
{
[4]
#!/bin/bash
echo __vlan_vid_add > /sys/kernel/debug/fail_function/inject
printf %#x -12 > /sys/kernel/debug/fail_function/__vlan_vid_add/retval
echo 100 > /sys/kernel/debug/fail_function/probability
echo 1 > /sys/kernel/debug/fail_function/times
echo 1 > /sys/kernel/debug/fail_function/verbose
ip link add bond1 up type bond mode 0
ip link add link bond1 name vlan0 type vlan id 0 protocol 802.1q
ip link set dev bond1 down
ip link del vlan0
ip link del dev bond1
[5]
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 06908e37c3d9..9a6df8c1daf9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -357,6 +357,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
return err;
}
+static void vlan_vid0_add(struct net_device *dev)
+{
+ struct vlan_info *vlan_info;
+ int err;
+
+ if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ return;
+
+ pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name);
+
+ err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
+ if (err)
+ return;
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ vlan_info->auto_vid0 = true;
+}
+
+static void vlan_vid0_del(struct net_device *dev)
+{
+ struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info);
+
+ if (!vlan_info || !vlan_info->auto_vid0)
+ return;
+
+ vlan_info->auto_vid0 = false;
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+}
+
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
@@ -378,15 +407,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
return notifier_from_errno(err);
}
- if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- pr_info("adding VLAN 0 to HW filter on device %s\n",
- dev->name);
- vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
- }
- if (event == NETDEV_DOWN &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+ if (event == NETDEV_UP)
+ vlan_vid0_add(dev);
+ else if (event == NETDEV_DOWN)
+ vlan_vid0_del(dev);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5eaf38875554..c7ffe591d593 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,6 +33,7 @@ struct vlan_info {
struct vlan_group grp;
struct list_head vid_list;
unsigned int nr_vids;
+ bool auto_vid0;
struct rcu_head rcu;
};
Powered by blists - more mailing lists