lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 22 Oct 2021 11:33:06 +0100
From:   Valentin Schneider <valentin.schneider@....com>
To:     linux-kernel@...r.kernel.org, linux-rt-users@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org
Cc:     Will Deacon <will@...nel.org>, Mark Rutland <mark.rutland@....com>,
        Marc Zyngier <maz@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Ard Biesheuvel <ardb@...nel.org>
Subject: [PATCH 2/3] irqchip/gic-v3-its: Postpone LPI pending table freeing and memreserve

Memory used by the LPI tables have to be made persistent for kexec to have
a chance to work, as explained in [1]. If they have been made persistent
and we are booting into a kexec'd kernel, we also need to free the pages
that were preemptively allocated by the new kernel for those tables.

Both of those operations currently happen during its_cpu_init(), which
happens in a _STARTING (IOW atomic) cpuhp callback for secondary
CPUs. efi_mem_reserve_iomem() issues a GFP_ATOMIC allocation, which
unfortunately doesn't work under PREEMPT_RT (this ends up grabbing a
non-raw spinlock, which can sleep under PREEMPT_RT). Similarly, freeing the
pages ends up grabbing a sleepable spinlock.

Since the memreserve is only required by kexec, it doesn't have to be
done so early in the secondary boot process. Issue the reservation in a new
CPUHP_AP_ONLINE_DYN cpuhp callback, and piggy-back the page freeing on top
of it. As kexec issues a machine_shutdown() prior to machine_kexec(), it
will be serialized vs a CPU being plugged to life by the hotplug machinery -
either the CPU will have been brought up and have had its redistributor's
pending table memreserved, or it never went online and will have its table
allocated by the new kernel.

[1]: https://lore.kernel.org/lkml/20180921195954.21574-1-marc.zyngier@arm.com/
Signed-off-by: Valentin Schneider <valentin.schneider@....com>
---
 drivers/irqchip/irq-gic-v3-its.c | 65 ++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index a688ed5c21e8..a6a4af59205e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -47,6 +47,8 @@
 #define RDISTS_FLAGS_RD_TABLES_PREALLOCATED	(1 << 1)
 
 #define RDIST_FLAGS_LPI_ENABLED                 BIT(0)
+#define RDIST_FLAGS_PENDTABLE_RESERVED          BIT(1)
+#define RDIST_FLAGS_PENDTABLE_PREALLOCATED      BIT(2)
 
 static u32 lpi_id_bits;
 
@@ -3065,15 +3067,15 @@ static void its_cpu_init_lpis(void)
 		paddr &= GENMASK_ULL(51, 16);
 
 		WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ));
-		its_free_pending_table(gic_data_rdist()->pend_page);
-		gic_data_rdist()->pend_page = NULL;
+		gic_data_rdist()->flags |=
+			RDIST_FLAGS_PENDTABLE_RESERVED |
+			RDIST_FLAGS_PENDTABLE_PREALLOCATED;
 
 		goto out;
 	}
 
 	pend_page = gic_data_rdist()->pend_page;
 	paddr = page_to_phys(pend_page);
-	WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ));
 
 	/* set PROPBASE */
 	val = (gic_rdists->prop_table_pa |
@@ -3163,7 +3165,8 @@ static void its_cpu_init_lpis(void)
 	gic_data_rdist()->flags |= RDIST_FLAGS_LPI_ENABLED;
 	pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
 		smp_processor_id(),
-		gic_data_rdist()->pend_page ? "allocated" : "reserved",
+		gic_data_rdist()->flags & RDIST_FLAGS_PENDTABLE_PREALLOCATED ?
+		"reserved" : "allocated",
 		&paddr);
 }
 
@@ -5202,6 +5205,39 @@ int its_cpu_init(void)
 	return 0;
 }
 
+#ifdef CONFIG_EFI
+static int its_cpu_memreserve_lpi(unsigned int cpu)
+{
+	struct page *pend_page = gic_data_rdist()->pend_page;
+	phys_addr_t paddr;
+
+	/*
+	 * If the pending table was pre-programmed, free the memory we
+	 * preemptively allocated.
+	 */
+	if (pend_page &&
+	    (gic_data_rdist()->flags & RDIST_FLAGS_PENDTABLE_PREALLOCATED)) {
+		its_free_pending_table(gic_data_rdist()->pend_page);
+		gic_data_rdist()->pend_page = NULL;
+	}
+
+	/*
+	 * Did we already issue a memreserve? This could be via a previous
+	 * invocation of this callback, or in a previous life before kexec.
+	 */
+	if (gic_data_rdist()->flags & RDIST_FLAGS_PENDTABLE_RESERVED)
+		return 0;
+
+	gic_data_rdist()->flags |= RDIST_FLAGS_PENDTABLE_RESERVED;
+
+	pend_page = gic_data_rdist()->pend_page;
+	paddr = page_to_phys(pend_page);
+	WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ));
+
+	return 0;
+}
+#endif
+
 static const struct of_device_id its_device_id[] = {
 	{	.compatible	= "arm,gic-v3-its",	},
 	{},
@@ -5385,6 +5421,23 @@ static void __init its_acpi_probe(void)
 static void __init its_acpi_probe(void) { }
 #endif
 
+static int __init its_lpi_memreserve_init(void)
+{
+	int state;
+
+	if (!efi_enabled(EFI_CONFIG_TABLES))
+		return 0;
+
+	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				"irqchip/arm/gicv3/memreserve:online",
+				its_cpu_memreserve_lpi,
+				NULL);
+	if (state < 0)
+		return state;
+
+	return 0;
+}
+
 int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 		    struct irq_domain *parent_domain)
 {
@@ -5412,6 +5465,10 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	if (err)
 		return err;
 
+	err = its_lpi_memreserve_init();
+	if (err)
+		return err;
+
 	list_for_each_entry(its, &its_nodes, entry) {
 		has_v4 |= is_v4(its);
 		has_v4_1 |= is_v4_1(its);
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ