[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <d416a14ec38c7ba463341b83a7a9ec6ccc435246.1734419614.git.christophe.leroy@csgroup.eu>
Date: Tue, 17 Dec 2024 08:18:25 +0100
From: Christophe Leroy <christophe.leroy@...roup.eu>
To: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>,
"Eric W. Biederman" <ebiederm@...ssion.com>
Cc: Christophe Leroy <christophe.leroy@...roup.eu>,
linux-kernel@...r.kernel.org,
netdev@...r.kernel.org,
Maxime Chevallier <maxime.chevallier@...tlin.com>,
TRINH THAI Florent <florent.trinh-thai@...soprasteria.com>,
CASAUBON Jean Michel <jean-michel.casaubon@...soprasteria.com>
Subject: [PATCH net] net: sysfs: Fix deadlock situation in sysfs accesses
The following problem is encountered on kernel built with
CONFIG_PREEMPT. An snmp daemon running with normal priority is
regularly calling ioctl(SIOCGMIIPHY). Another process running with
SCHED_FIFO policy is regularly reading /sys/class/net/eth0/carrier.
After some random time, the snmp daemon gets preempted while holding
the RTNL mutex then the high priority process is busy looping into
carrier_show which bails out early due to a non-successfull
rtnl_trylock() which implies restart_syscall(). Because the snmp
daemon has a lower priority, it never gets the chances to release
the RTNL mutex and the high-priority task continues to loop forever.
Replace the trylock by lock_interruptible. This will increase the
priority of the task holding the lock so that it can release it and
allow the reader of /sys/class/net/eth0/carrier to actually perform
its read.
The problem can be reproduced with the following two simple apps:
The one below runs with normal SCHED_OTHER priority:
int main(int argc, char **argv)
{
int sk = socket(AF_INET, SOCK_DGRAM, 0);
char buf[32];
struct ifreq ifr = {.ifr_name = "eth0"};
for (;;)
ioctl(sk, SIOCGMIIPHY, &ifr);
exit(0);
}
And the following one is started with chrt -f 80 so it runs with
SCHED_FIFO policy:
int main(int argc, char **argv)
{
int fd = open("/sys/class/net/eth0/carrier", O_RDONLY);
char buf[32];
for (;;) {
read(fd, buf, sizeof(buf));
lseek(fd, 0, SEEK_SET);
usleep(5000);
}
exit(0);
}
When running alone, that high priority task takes approx 6% CPU time.
When running together with the first one above, the high priority task
reaches almost 100% of CPU time.
With this fix applied, the high priority task remains at 6% CPU time
while the other one takes the remaining CPU time available.
Fixes: 336ca57c3b4e ("net-sysfs: Use rtnl_trylock in sysfs methods.")
Signed-off-by: Christophe Leroy <christophe.leroy@...roup.eu>
---
include/linux/rtnetlink.h | 1 +
net/core/net-sysfs.c | 70 +++++++++++++++++++--------------------
net/core/rtnetlink.c | 6 ++++
3 files changed, 42 insertions(+), 35 deletions(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 14b88f551920..f060eecbc382 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -43,6 +43,7 @@ extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
+extern int rtnl_lock_interruptible(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2d9afc6e2161..0f97ea78c4da 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,8 +95,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (ret)
goto err;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (dev_isalive(netdev)) {
ret = (*set)(netdev, new);
@@ -190,7 +190,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
/* The check is also done in change_carrier; this helps returning early
- * without hitting the trylock/restart in netdev_store.
+ * without hitting the lock/restart in netdev_store.
*/
if (!netdev->netdev_ops->ndo_change_carrier)
return -EOPNOTSUPP;
@@ -204,8 +204,8 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (netif_running(netdev)) {
/* Synchronize carrier state with link watch,
@@ -228,13 +228,13 @@ static ssize_t speed_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the lock/restart below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -254,13 +254,13 @@ static ssize_t duplex_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the lock/restart below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -459,8 +459,8 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (dev_isalive(netdev)) {
ret = dev_set_alias(netdev, buf, count);
@@ -523,13 +523,13 @@ static ssize_t phys_port_id_show(struct device *dev,
ssize_t ret = -EINVAL;
/* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the trylock/restart below.
+ * early without hitting the lock/restart below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (dev_isalive(netdev)) {
struct netdev_phys_item_id ppid;
@@ -551,14 +551,14 @@ static ssize_t phys_port_name_show(struct device *dev,
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the lock/restart below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (dev_isalive(netdev)) {
char name[IFNAMSIZ];
@@ -580,15 +580,15 @@ static ssize_t phys_switch_id_show(struct device *dev,
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below. This works
+ * returning early without hitting the lock/restart below. This works
* because recurse is false when calling dev_get_port_parent_id.
*/
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
if (dev_isalive(netdev)) {
struct netdev_phys_item_id ppid = { };
@@ -1282,8 +1282,8 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
if (!netif_is_multiqueue(dev))
return -ENOENT;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
index = get_netdev_queue_index(queue);
@@ -1327,7 +1327,7 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
return -EPERM;
/* The check is also done later; this helps returning early without
- * hitting the trylock/restart below.
+ * hitting the lock/restart below.
*/
if (!dev->netdev_ops->ndo_set_tx_maxrate)
return -EOPNOTSUPP;
@@ -1336,8 +1336,8 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (err < 0)
return err;
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
err = -EOPNOTSUPP;
if (dev->netdev_ops->ndo_set_tx_maxrate)
@@ -1593,8 +1593,8 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
@@ -1640,9 +1640,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
- if (!rtnl_trylock()) {
+ if (rtnl_lock_interruptible()) {
free_cpumask_var(mask);
- return restart_syscall();
+ return -ERESTARTSYS;
}
err = netif_set_xps_queue(dev, mask, index);
@@ -1664,8 +1664,8 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ if (rtnl_lock_interruptible())
+ return -ERESTARTSYS;
tc = netdev_txq_to_tc(dev, index);
rtnl_unlock();
@@ -1699,9 +1699,9 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
- if (!rtnl_trylock()) {
+ if (rtnl_lock_interruptible()) {
bitmap_free(mask);
- return restart_syscall();
+ return -ERESTARTSYS;
}
cpus_read_lock();
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ebcfc2debf1a..a52ffc3c8ab8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -80,6 +80,12 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_interruptible(void)
+{
+ return mutex_lock_interruptible(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_interruptible);
+
int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);
--
2.47.0
Powered by blists - more mailing lists