[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <537343f7-c580-43b0-9ad2-691701b9fb8e@I-love.SAKURA.ne.jp>
Date: Mon, 19 Jan 2026 20:19:44 +0900
From: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
To: Aviad Yehezkel <aviadye@...lnaox.com>, Aviv Heller <avivh@...lanox.com>,
Boris Pismenny <borisp@...lanox.com>,
"David S. Miller"
<davem@...emloft.net>,
Florian Westphal <fw@...len.de>, Guy Shapiro <guysh@...lanox.com>,
Ilan Tayari <ilant@...lanox.com>,
Kristian Evensen <kristian.evensen@...il.com>,
Leon Romanovsky <leon@...nel.org>, Leon Romanovsky <leonro@...dia.com>,
Raed Salem <raeds@...lanox.com>, Raed Salem <raeds@...dia.com>,
Saeed Mahameed <saeedm@...lanox.com>,
Steffen Klassert <steffen.klassert@...unet.com>,
Yossi Kuperman <yossiku@...lanox.com>
Cc: Network Development <netdev@...r.kernel.org>
Subject: [PATCH] xfrm: force flush upon NETDEV_UNREGISTER event
syzbot is reporting that "struct xfrm_state" refcount is leaking.
unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 2
ref_tracker: netdev@...f888052f24618 has 1/1 users at
__netdev_tracker_alloc include/linux/netdevice.h:4400 [inline]
netdev_tracker_alloc include/linux/netdevice.h:4412 [inline]
xfrm_dev_state_add+0x3a5/0x1080 net/xfrm/xfrm_device.c:316
xfrm_state_construct net/xfrm/xfrm_user.c:986 [inline]
xfrm_add_sa+0x34ff/0x5fa0 net/xfrm/xfrm_user.c:1022
xfrm_user_rcv_msg+0x58e/0xc00 net/xfrm/xfrm_user.c:3507
netlink_rcv_skb+0x158/0x420 net/netlink/af_netlink.c:2550
xfrm_netlink_rcv+0x71/0x90 net/xfrm/xfrm_user.c:3529
netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1344
netlink_sendmsg+0x8c8/0xdd0 net/netlink/af_netlink.c:1894
sock_sendmsg_nosec net/socket.c:727 [inline]
__sock_sendmsg net/socket.c:742 [inline]
____sys_sendmsg+0xa5d/0xc30 net/socket.c:2592
___sys_sendmsg+0x134/0x1d0 net/socket.c:2646
__sys_sendmsg+0x16d/0x220 net/socket.c:2678
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Currently, the NETDEV_UNREGISTER case in xfrm_dev_event() is no-op
when (dev->features & NETIF_F_HW_ESP) == 0. Since xfrm_dev_state_add()
and xfrm_dev_policy_add() take a reference to "struct net_device", the
corresponding NETDEV_UNREGISTER handler must release that reference.
Flush dev state and dev policy, without checking whether to flush, when
NETDEV_UNREGISTER event fires.
Reported-by: syzbot+881d65229ca4f9ae8c84@...kaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
---
WARNING: This patch is just an analogy case of net/can/j1939 module.
This patch is completely untested and might not solve this problem, for
reproducer is not available for this problem. I appreciate if someone
can write a test code for this problem.
drivers/net/bonding/bond_main.c | 2 +-
include/net/xfrm.h | 5 ++---
net/xfrm/xfrm_device.c | 15 ++++++++++++---
net/xfrm/xfrm_policy.c | 4 ++--
net/xfrm/xfrm_state.c | 4 ++--
5 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3d56339a8a10..bbb6bc4b30cd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3824,7 +3824,7 @@ static int bond_master_netdev_event(unsigned long event,
case NETDEV_UNREGISTER:
bond_remove_proc_entry(event_bond);
#ifdef CONFIG_XFRM_OFFLOAD
- xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true);
+ xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true, false);
#endif /* CONFIG_XFRM_OFFLOAD */
break;
case NETDEV_REGISTER:
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0a14daaa5dd4..b19e7b1fbda2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1765,9 +1765,8 @@ struct xfrmk_spdinfo {
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
-int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
-int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
- bool task_valid);
+int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid, bool force);
+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid, bool force);
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 52ae0e034d29..ec094aeb1604 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -537,13 +537,21 @@ static int xfrm_api_check(struct net_device *dev)
static int xfrm_dev_down(struct net_device *dev)
{
if (dev->features & NETIF_F_HW_ESP) {
- xfrm_dev_state_flush(dev_net(dev), dev, true);
- xfrm_dev_policy_flush(dev_net(dev), dev, true);
+ xfrm_dev_state_flush(dev_net(dev), dev, true, false);
+ xfrm_dev_policy_flush(dev_net(dev), dev, true, false);
}
return NOTIFY_DONE;
}
+static int xfrm_dev_unregister(struct net_device *dev)
+{
+ xfrm_dev_state_flush(dev_net(dev), dev, true, true);
+ xfrm_dev_policy_flush(dev_net(dev), dev, true, true);
+
+ return NOTIFY_DONE;
+}
+
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -556,8 +564,9 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
return xfrm_api_check(dev);
case NETDEV_DOWN:
- case NETDEV_UNREGISTER:
return xfrm_dev_down(dev);
+ case NETDEV_UNREGISTER:
+ return xfrm_dev_unregister(dev);
}
return NOTIFY_DONE;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 62486f866975..a451dff25c52 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1855,14 +1855,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
EXPORT_SYMBOL(xfrm_policy_flush);
int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
- bool task_valid)
+ bool task_valid, bool forced)
{
int dir, err = 0, cnt = 0;
struct xfrm_policy *pol;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
+ err = forced ? 0 : xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
if (err)
goto out;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 98b362d51836..29a124291331 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -958,7 +958,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
}
EXPORT_SYMBOL(xfrm_state_flush);
-int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
+int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid, bool forced)
{
struct xfrm_state *x;
struct hlist_node *tmp;
@@ -966,7 +966,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
int i, err = 0, cnt = 0;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
+ err = forced ? 0 : xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
if (err)
goto out;
--
2.47.3
Powered by blists - more mailing lists