[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201221193644.1296933-2-atenart@kernel.org>
Date: Mon, 21 Dec 2020 20:36:42 +0100
From: Antoine Tenart <atenart@...nel.org>
To: davem@...emloft.net, kuba@...nel.org, alexander.duyck@...il.com
Cc: Antoine Tenart <atenart@...nel.org>, netdev@...r.kernel.org,
pabeni@...hat.com
Subject: [PATCH net v2 1/3] net: fix race conditions in xps by locking the maps and dev->tc_num
Two race conditions can be triggered in xps, resulting in various oops
and invalid memory accesses:
1. Calling netdev_set_num_tc while netif_set_xps_queue:
- netdev_set_num_tc sets dev->tc_num.
- netif_set_xps_queue uses dev->tc_num as one of the parameters to
compute the size of new_dev_maps when allocating it. dev->tc_num is
also used to access the map, and the compiler may generate code to
retrieve this field multiple times in the function.
If new_dev_maps is allocated using dev->tc_num and then dev->tc_num
is set to a higher value through netdev_set_num_tc, later accesses to
new_dev_maps in netif_set_xps_queue could lead to accessing memory
outside of new_dev_maps; triggering an oops.
One way of triggering this is to set an iface up (for which the
driver uses netdev_set_num_tc in the open path, such as bnx2x) and
writing to xps_cpus or xps_rxqs in a concurrent thread. With the
right timing an oops is triggered.
2. Calling netif_set_xps_queue while netdev_set_num_tc is running:
2.1. netdev_set_num_tc starts by resetting the xps queues,
dev->tc_num isn't updated yet.
2.2. netif_set_xps_queue is called, setting up the maps with the
*old* dev->num_tc.
2.3. dev->tc_num is updated.
2.3. Later accesses to the map leads to out of bound accesses and
oops.
A similar issue can be found with netdev_reset_tc.
The fix can't be to only link the size of the maps to them, as
invalid configuration could still occur. The reset then set logic in
both netdev_set_num_tc and netdev_reset_tc must be protected by a
lock.
Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc
and netdev_reset_tc should be mutually exclusive.
This patch fixes those races by:
- Reworking netif_set_xps_queue by moving the xps_map_mutex up so the
access of dev->num_tc is done under the lock.
- Using xps_map_mutex in both netdev_set_num_tc and netdev_reset_tc for
the reset and set logic:
+ As xps_map_mutex was taken in the reset path, netif_reset_xps_queues
had to be reworked to offer an unlocked version (as well as
netdev_unbind_all_sb_channels which calls it).
+ cpus_read_lock was taken in the reset path as well, and is always
taken before xps_map_mutex. It had to be moved out of the unlocked
version as well.
This is why the patch is a little bit longer, and moves
netdev_unbind_sb_channel up in the file.
Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
Signed-off-by: Antoine Tenart <atenart@...nel.org>
---
net/core/dev.c | 122 ++++++++++++++++++++++++++++++++-----------------
1 file changed, 81 insertions(+), 41 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8fa739259041..effdb7fee9df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2527,8 +2527,8 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
}
}
-static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
- u16 count)
+static void __netif_reset_xps_queues(struct net_device *dev, u16 offset,
+ u16 count)
{
const unsigned long *possible_mask = NULL;
struct xps_dev_maps *dev_maps;
@@ -2537,9 +2537,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
if (!static_key_false(&xps_needed))
return;
- cpus_read_lock();
- mutex_lock(&xps_map_mutex);
-
if (static_key_false(&xps_rxqs_needed)) {
dev_maps = xmap_dereference(dev->xps_rxqs_map);
if (dev_maps) {
@@ -2551,15 +2548,23 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
dev_maps = xmap_dereference(dev->xps_cpus_map);
if (!dev_maps)
- goto out_no_maps;
+ return;
if (num_possible_cpus() > 1)
possible_mask = cpumask_bits(cpu_possible_mask);
nr_ids = nr_cpu_ids;
clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
false);
+}
+
+static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
+ u16 count)
+{
+ cpus_read_lock();
+ mutex_lock(&xps_map_mutex);
+
+ __netif_reset_xps_queues(dev, offset, count);
-out_no_maps:
mutex_unlock(&xps_map_mutex);
cpus_read_unlock();
}
@@ -2615,27 +2620,32 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
{
const unsigned long *online_mask = NULL, *possible_mask = NULL;
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
- int i, j, tci, numa_node_id = -2;
+ int i, j, tci, numa_node_id = -2, ret = 0;
int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map;
bool active = false;
unsigned int nr_ids;
+ mutex_lock(&xps_map_mutex);
+
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
- if (num_tc < 0)
- return -EINVAL;
+ if (num_tc < 0) {
+ ret = -EINVAL;
+ goto unlock;
+ }
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
tc = netdev_txq_to_tc(dev, index);
- if (tc < 0)
- return -EINVAL;
+ if (tc < 0) {
+ ret = -EINVAL;
+ goto unlock;
+ }
}
- mutex_lock(&xps_map_mutex);
if (is_rxqs_map) {
maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
dev_maps = xmap_dereference(dev->xps_rxqs_map);
@@ -2659,8 +2669,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!new_dev_maps)
new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
if (!new_dev_maps) {
- mutex_unlock(&xps_map_mutex);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto unlock;
}
tci = j * num_tc + tc;
@@ -2765,7 +2775,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
}
if (!dev_maps)
- goto out_no_maps;
+ goto unlock;
/* removes tx-queue from unused CPUs/rx-queues */
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
@@ -2783,10 +2793,10 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!active)
reset_xps_maps(dev, dev_maps, is_rxqs_map);
-out_no_maps:
+unlock:
mutex_unlock(&xps_map_mutex);
- return 0;
+ return ret;
error:
/* remove any maps that we added */
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
@@ -2822,28 +2832,68 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
EXPORT_SYMBOL(netif_set_xps_queue);
#endif
-static void netdev_unbind_all_sb_channels(struct net_device *dev)
+
+static void __netdev_unbind_sb_channel(struct net_device *dev,
+ struct net_device *sb_dev)
+{
+ struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+ __netif_reset_xps_queues(sb_dev, 0, dev->num_tx_queues);
+#endif
+
+ memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+ memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+ while (txq-- != &dev->_tx[0]) {
+ if (txq->sb_dev == sb_dev)
+ txq->sb_dev = NULL;
+ }
+}
+
+void netdev_unbind_sb_channel(struct net_device *dev,
+ struct net_device *sb_dev)
+{
+ cpus_read_lock();
+ mutex_lock(&xps_map_mutex);
+
+ __netdev_unbind_sb_channel(dev, sb_dev);
+
+ mutex_unlock(&xps_map_mutex);
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+static void __netdev_unbind_all_sb_channels(struct net_device *dev)
{
struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
/* Unbind any subordinate channels */
while (txq-- != &dev->_tx[0]) {
if (txq->sb_dev)
- netdev_unbind_sb_channel(dev, txq->sb_dev);
+ __netdev_unbind_sb_channel(dev, txq->sb_dev);
}
}
void netdev_reset_tc(struct net_device *dev)
{
#ifdef CONFIG_XPS
- netif_reset_xps_queues_gt(dev, 0);
+ cpus_read_lock();
+ mutex_lock(&xps_map_mutex);
+
+ __netif_reset_xps_queues(dev, 0, dev->num_tx_queues);
#endif
- netdev_unbind_all_sb_channels(dev);
+ __netdev_unbind_all_sb_channels(dev);
/* Reset TC configuration of device */
dev->num_tc = 0;
memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+
+#ifdef CONFIG_XPS
+ mutex_unlock(&xps_map_mutex);
+ cpus_read_unlock();
+#endif
}
EXPORT_SYMBOL(netdev_reset_tc);
@@ -2867,32 +2917,22 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
return -EINVAL;
#ifdef CONFIG_XPS
- netif_reset_xps_queues_gt(dev, 0);
+ cpus_read_lock();
+ mutex_lock(&xps_map_mutex);
+
+ __netif_reset_xps_queues(dev, 0, dev->num_tx_queues);
#endif
- netdev_unbind_all_sb_channels(dev);
+ __netdev_unbind_all_sb_channels(dev);
dev->num_tc = num_tc;
- return 0;
-}
-EXPORT_SYMBOL(netdev_set_num_tc);
-
-void netdev_unbind_sb_channel(struct net_device *dev,
- struct net_device *sb_dev)
-{
- struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
#ifdef CONFIG_XPS
- netif_reset_xps_queues_gt(sb_dev, 0);
+ mutex_unlock(&xps_map_mutex);
+ cpus_read_unlock();
#endif
- memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
- memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
-
- while (txq-- != &dev->_tx[0]) {
- if (txq->sb_dev == sb_dev)
- txq->sb_dev = NULL;
- }
+ return 0;
}
-EXPORT_SYMBOL(netdev_unbind_sb_channel);
+EXPORT_SYMBOL(netdev_set_num_tc);
int netdev_bind_sb_channel_queue(struct net_device *dev,
struct net_device *sb_dev,
--
2.29.2
Powered by blists - more mailing lists