lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 21 Dec 2020 20:36:42 +0100
From:   Antoine Tenart <atenart@...nel.org>
To:     davem@...emloft.net, kuba@...nel.org, alexander.duyck@...il.com
Cc:     Antoine Tenart <atenart@...nel.org>, netdev@...r.kernel.org,
        pabeni@...hat.com
Subject: [PATCH net v2 1/3] net: fix race conditions in xps by locking the maps and dev->tc_num

Two race conditions can be triggered in xps, resulting in various oops
and invalid memory accesses:

1. Calling netdev_set_num_tc while netif_set_xps_queue:

   - netdev_set_num_tc sets dev->tc_num.

   - netif_set_xps_queue uses dev->tc_num as one of the parameters to
     compute the size of new_dev_maps when allocating it. dev->tc_num is
     also used to access the map, and the compiler may generate code to
     retrieve this field multiple times in the function.

   If new_dev_maps is allocated using dev->tc_num and then dev->tc_num
   is set to a higher value through netdev_set_num_tc, later accesses to
   new_dev_maps in netif_set_xps_queue could lead to accessing memory
   outside of new_dev_maps; triggering an oops.

   One way of triggering this is to set an iface up (for which the
   driver uses netdev_set_num_tc in the open path, such as bnx2x) and
   writing to xps_cpus or xps_rxqs in a concurrent thread. With the
   right timing an oops is triggered.

2. Calling netif_set_xps_queue while netdev_set_num_tc is running:

   2.1. netdev_set_num_tc starts by resetting the xps queues,
        dev->tc_num isn't updated yet.

   2.2. netif_set_xps_queue is called, setting up the maps with the
        *old* dev->num_tc.

   2.3. dev->tc_num is updated.

   2.3. Later accesses to the map leads to out of bound accesses and
        oops.

   A similar issue can be found with netdev_reset_tc.

   The fix can't be to only link the size of the maps to them, as
   invalid configuration could still occur. The reset then set logic in
   both netdev_set_num_tc and netdev_reset_tc must be protected by a
   lock.

Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc
and netdev_reset_tc should be mutually exclusive.

This patch fixes those races by:

- Reworking netif_set_xps_queue by moving the xps_map_mutex up so the
  access of dev->num_tc is done under the lock.

- Using xps_map_mutex in both netdev_set_num_tc and netdev_reset_tc for
  the reset and set logic:

  + As xps_map_mutex was taken in the reset path, netif_reset_xps_queues
    had to be reworked to offer an unlocked version (as well as
    netdev_unbind_all_sb_channels which calls it).

  + cpus_read_lock was taken in the reset path as well, and is always
    taken before xps_map_mutex. It had to be moved out of the unlocked
    version as well.

  This is why the patch is a little bit longer, and moves
  netdev_unbind_sb_channel up in the file.

Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
Signed-off-by: Antoine Tenart <atenart@...nel.org>
---
 net/core/dev.c | 122 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 81 insertions(+), 41 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8fa739259041..effdb7fee9df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2527,8 +2527,8 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
 	}
 }
 
-static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
-				   u16 count)
+static void __netif_reset_xps_queues(struct net_device *dev, u16 offset,
+				     u16 count)
 {
 	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
@@ -2537,9 +2537,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 	if (!static_key_false(&xps_needed))
 		return;
 
-	cpus_read_lock();
-	mutex_lock(&xps_map_mutex);
-
 	if (static_key_false(&xps_rxqs_needed)) {
 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
 		if (dev_maps) {
@@ -2551,15 +2548,23 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 
 	dev_maps = xmap_dereference(dev->xps_cpus_map);
 	if (!dev_maps)
-		goto out_no_maps;
+		return;
 
 	if (num_possible_cpus() > 1)
 		possible_mask = cpumask_bits(cpu_possible_mask);
 	nr_ids = nr_cpu_ids;
 	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
 		       false);
+}
+
+static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
+				   u16 count)
+{
+	cpus_read_lock();
+	mutex_lock(&xps_map_mutex);
+
+	__netif_reset_xps_queues(dev, offset, count);
 
-out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 	cpus_read_unlock();
 }
@@ -2615,27 +2620,32 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 {
 	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, j, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2, ret = 0;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
 	unsigned int nr_ids;
 
+	mutex_lock(&xps_map_mutex);
+
 	if (dev->num_tc) {
 		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
-		if (num_tc < 0)
-			return -EINVAL;
+		if (num_tc < 0) {
+			ret = -EINVAL;
+			goto unlock;
+		}
 
 		/* If queue belongs to subordinate dev use its map */
 		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
 
 		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0)
-			return -EINVAL;
+		if (tc < 0) {
+			ret = -EINVAL;
+			goto unlock;
+		}
 	}
 
-	mutex_lock(&xps_map_mutex);
 	if (is_rxqs_map) {
 		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
@@ -2659,8 +2669,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
-			mutex_unlock(&xps_map_mutex);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto unlock;
 		}
 
 		tci = j * num_tc + tc;
@@ -2765,7 +2775,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	if (!dev_maps)
-		goto out_no_maps;
+		goto unlock;
 
 	/* removes tx-queue from unused CPUs/rx-queues */
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
@@ -2783,10 +2793,10 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!active)
 		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 
-out_no_maps:
+unlock:
 	mutex_unlock(&xps_map_mutex);
 
-	return 0;
+	return ret;
 error:
 	/* remove any maps that we added */
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
@@ -2822,28 +2832,68 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
-static void netdev_unbind_all_sb_channels(struct net_device *dev)
+
+static void __netdev_unbind_sb_channel(struct net_device *dev,
+				       struct net_device *sb_dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+	__netif_reset_xps_queues(sb_dev, 0, dev->num_tx_queues);
+#endif
+
+	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev == sb_dev)
+			txq->sb_dev = NULL;
+	}
+}
+
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev)
+{
+	cpus_read_lock();
+	mutex_lock(&xps_map_mutex);
+
+	__netdev_unbind_sb_channel(dev, sb_dev);
+
+	mutex_unlock(&xps_map_mutex);
+	cpus_read_unlock();
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+static void __netdev_unbind_all_sb_channels(struct net_device *dev)
 {
 	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
 
 	/* Unbind any subordinate channels */
 	while (txq-- != &dev->_tx[0]) {
 		if (txq->sb_dev)
-			netdev_unbind_sb_channel(dev, txq->sb_dev);
+			__netdev_unbind_sb_channel(dev, txq->sb_dev);
 	}
 }
 
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
-	netif_reset_xps_queues_gt(dev, 0);
+	cpus_read_lock();
+	mutex_lock(&xps_map_mutex);
+
+	__netif_reset_xps_queues(dev, 0, dev->num_tx_queues);
 #endif
-	netdev_unbind_all_sb_channels(dev);
+	__netdev_unbind_all_sb_channels(dev);
 
 	/* Reset TC configuration of device */
 	dev->num_tc = 0;
 	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
 	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+
+#ifdef CONFIG_XPS
+	mutex_unlock(&xps_map_mutex);
+	cpus_read_unlock();
+#endif
 }
 EXPORT_SYMBOL(netdev_reset_tc);
 
@@ -2867,32 +2917,22 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 		return -EINVAL;
 
 #ifdef CONFIG_XPS
-	netif_reset_xps_queues_gt(dev, 0);
+	cpus_read_lock();
+	mutex_lock(&xps_map_mutex);
+
+	__netif_reset_xps_queues(dev, 0, dev->num_tx_queues);
 #endif
-	netdev_unbind_all_sb_channels(dev);
+	__netdev_unbind_all_sb_channels(dev);
 
 	dev->num_tc = num_tc;
-	return 0;
-}
-EXPORT_SYMBOL(netdev_set_num_tc);
-
-void netdev_unbind_sb_channel(struct net_device *dev,
-			      struct net_device *sb_dev)
-{
-	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
 
 #ifdef CONFIG_XPS
-	netif_reset_xps_queues_gt(sb_dev, 0);
+	mutex_unlock(&xps_map_mutex);
+	cpus_read_unlock();
 #endif
-	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
-	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
-
-	while (txq-- != &dev->_tx[0]) {
-		if (txq->sb_dev == sb_dev)
-			txq->sb_dev = NULL;
-	}
+	return 0;
 }
-EXPORT_SYMBOL(netdev_unbind_sb_channel);
+EXPORT_SYMBOL(netdev_set_num_tc);
 
 int netdev_bind_sb_channel_queue(struct net_device *dev,
 				 struct net_device *sb_dev,
-- 
2.29.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ