lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250828144400.1500374-1-den@valinux.co.jp>
Date: Thu, 28 Aug 2025 23:44:00 +0900
From: Koichiro Den <den@...inux.co.jp>
To: linux-arm-kernel@...ts.infradead.org
Cc: maz@...nel.org,
	tglx@...utronix.de,
	linux-kernel@...r.kernel.org
Subject: [PATCH v2] irqchip/gic-v3-its: Fix invalid wait context lockdep report

its_irq_set_vcpu_affinity() always runs under a raw_spin_lock wait
context, so calling kcalloc there is not permitted and RT-unsafe since
___slab_alloc() may acquire a local lock. The below is the actual
lockdep report observed:

  =============================
  [ BUG: Invalid wait context ]
  6.16.0-rc3-irqchip-next-7e28bba92c5c+ #1 Tainted: G S
  -----------------------------
  qemu-system-aar/2129 is trying to lock:
  ffff0085b74f2178 (batched_entropy_u32.lock){..-.}-{3:3}, at: get_random_u32+0x9c/0x708
  other info that might help us debug this:
  context-{5:5}
  6 locks held by qemu-system-aar/2129:
   #0: ffff0000b84a0738 (&vdev->igate){+.+.}-{4:4}, at: vfio_pci_core_ioctl+0x40c/0x748 [vfio_pci_core]
   #1: ffff8000883cef68 (lock#6){+.+.}-{4:4}, at: irq_bypass_register_producer+0x64/0x2f0
   #2: ffff0000ac0df960 (&its->its_lock){+.+.}-{4:4}, at: kvm_vgic_v4_set_forwarding+0x224/0x6f0
   #3: ffff000086dc4718 (&irq->irq_lock#3){....}-{2:2}, at: kvm_vgic_v4_set_forwarding+0x288/0x6f0
   #4: ffff0001356200c8 (&irq_desc_lock_class){-.-.}-{2:2}, at: __irq_get_desc_lock+0xc8/0x158
   #5: ffff00009eae4850 (&dev->event_map.vlpi_lock){....}-{2:2}, at: its_irq_set_vcpu_affinity+0x8c/0x528
  ...
  Call trace:
   show_stack+0x30/0x98 (C)
   dump_stack_lvl+0x9c/0xd0
   dump_stack+0x1c/0x34
   __lock_acquire+0x814/0xb40
   lock_acquire.part.0+0x16c/0x2a8
   lock_acquire+0x8c/0x178
   get_random_u32+0xd4/0x708
   __get_random_u32_below+0x20/0x80
   shuffle_freelist+0x5c/0x1b0
   allocate_slab+0x15c/0x348
   new_slab+0x48/0x80
   ___slab_alloc+0x590/0x8b8
   __slab_alloc.isra.0+0x3c/0x80
   __kmalloc_noprof+0x174/0x520
   its_vlpi_map+0x834/0xce0
   its_irq_set_vcpu_affinity+0x21c/0x528
   irq_set_vcpu_affinity+0x160/0x1b0
   its_map_vlpi+0x90/0x100
   kvm_vgic_v4_set_forwarding+0x3c4/0x6f0
   kvm_arch_irq_bypass_add_producer+0xac/0x108
   __connect+0x138/0x1b0
   irq_bypass_register_producer+0x16c/0x2f0
   vfio_msi_set_vector_signal+0x2c0/0x5a8 [vfio_pci_core]
   vfio_msi_set_block+0x8c/0x120 [vfio_pci_core]
   vfio_pci_set_msi_trigger+0x120/0x3d8 [vfio_pci_core]
  ...

To avoid this, simply pre-allocate vlpi_maps when creating an ITS v4
device with LPIs allocation. The trade-off is some wasted memory
depending on nr_lpis, if none of those LPIs are never upgraded to VLPIs.

An alternative would be to move the vlpi_maps allocation out of
its_map_vlpi() and introduce a two-stage prepare/commit flow, allowing a
caller (KVM in the lockdep splat shown above) to do the allocation
outside irq_set_vcpu_affinity(). However, this would unnecessarily add
complexity.

Signed-off-by: Koichiro Den <den@...inux.co.jp>
---
Changes in v2:
- Fix missing kfree(vlpi_maps) on error path
- Fix missing vlpi_maps NULL initialization
- Add prealloc_vlpi_maps to simplify the if condition
- Fix typo in commit message (s/allcation/allocation/)

v1: https://lore.kernel.org/all/20250827073848.1410315-1-den@valinux.co.jp/
---
 drivers/irqchip/irq-gic-v3-its.c | 33 +++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 467cb78435a9..d4dfb738c226 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1923,19 +1923,10 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
 	if (!info->map)
 		return -EINVAL;
 
-	if (!its_dev->event_map.vm) {
-		struct its_vlpi_map *maps;
-
-		maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps),
-			       GFP_ATOMIC);
-		if (!maps)
-			return -ENOMEM;
-
+	if (!its_dev->event_map.vm)
 		its_dev->event_map.vm = info->map->vm;
-		its_dev->event_map.vlpi_maps = maps;
-	} else if (its_dev->event_map.vm != info->map->vm) {
+	else if (its_dev->event_map.vm != info->map->vm)
 		return -EINVAL;
-	}
 
 	/* Get our private copy of the mapping information */
 	its_dev->event_map.vlpi_maps[event] = *info->map;
@@ -2010,10 +2001,8 @@ static int its_vlpi_unmap(struct irq_data *d)
 	 * Drop the refcount and make the device available again if
 	 * this was the last VLPI.
 	 */
-	if (!--its_dev->event_map.nr_vlpis) {
+	if (!--its_dev->event_map.nr_vlpis)
 		its_dev->event_map.vm = NULL;
-		kfree(its_dev->event_map.vlpi_maps);
-	}
 
 	return 0;
 }
@@ -3469,6 +3458,8 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 {
 	struct its_device *dev;
 	unsigned long *lpi_map = NULL;
+	struct its_vlpi_map *vlpi_maps = NULL;
+	bool prealloc_vlpi_maps = alloc_lpis && is_v4(its);
 	unsigned long flags;
 	u16 *col_map = NULL;
 	void *itt;
@@ -3506,11 +3497,21 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 		lpi_base = 0;
 	}
 
-	if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) {
+	/*
+	 * Pre-allocate vlpi_maps to avoid slab allocation under the strict
+	 * raw spinlock wait context of irq_set_vcpu_affinity. This could
+	 * waste memory if no vlpi map is ever created.
+	 */
+	if (prealloc_vlpi_maps)
+		vlpi_maps = kcalloc(nr_lpis, sizeof(*vlpi_maps), GFP_KERNEL);
+
+	if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis) ||
+	    (!vlpi_maps && prealloc_vlpi_maps)) {
 		kfree(dev);
 		itt_free_pool(itt, sz);
 		bitmap_free(lpi_map);
 		kfree(col_map);
+		kfree(vlpi_maps);
 		return NULL;
 	}
 
@@ -3524,6 +3525,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 	dev->event_map.col_map = col_map;
 	dev->event_map.lpi_base = lpi_base;
 	dev->event_map.nr_lpis = nr_lpis;
+	dev->event_map.vlpi_maps = vlpi_maps;
 	raw_spin_lock_init(&dev->event_map.vlpi_lock);
 	dev->device_id = dev_id;
 	INIT_LIST_HEAD(&dev->entry);
@@ -3546,6 +3548,7 @@ static void its_free_device(struct its_device *its_dev)
 	list_del(&its_dev->entry);
 	raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
 	kfree(its_dev->event_map.col_map);
+	kfree(its_dev->event_map.vlpi_maps);
 	itt_free_pool(its_dev->itt, its_dev->itt_sz);
 	kfree(its_dev);
 }
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ