lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c142f447c59861f3c94b0fea7f055f4ff201fa98.camel@infradead.org>
Date: Mon, 23 Jun 2025 18:38:46 +0200
From: David Woodhouse <dwmw2@...radead.org>
To: Marc Zyngier <maz@...nel.org>, Oliver Upton <oliver.upton@...ux.dev>, 
 Joey Gouly <joey.gouly@....com>, Suzuki K Poulose <suzuki.poulose@....com>,
 Zenghui Yu <yuzenghui@...wei.com>, Catalin Marinas
 <catalin.marinas@....com>, Will Deacon <will@...nel.org>, Paolo Bonzini
 <pbonzini@...hat.com>, Sebastian Ott <sebott@...hat.com>, Andre Przywara
 <andre.przywara@....com>, Thorsten Blum <thorsten.blum@...ux.dev>, Shameer
 Kolothum <shameerali.kolothum.thodi@...wei.com>,
 linux-arm-kernel@...ts.infradead.org,  kvmarm@...ts.linux.dev,
 linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [RFC PATCH 2/2] KVM: arm64: vgic-its: Unmap all vPEs on shutdown

On Mon, 2025-06-23 at 14:27 +0100, David Woodhouse wrote:
> From: David Woodhouse <dwmw@...zon.co.uk>
> 
> We observed systems going dark on kexec, due to corruption of the new
> kernel's text (and sometimes the initrd). This was eventually determined
> to be caused by the vLPI pending tables used by the GIC in the previous
> kernel, which were not being quiesced properly.

FWIW this is a previous hack we attempted which *didn't* work. (For
illustration only; ignore the syscore .kexec hook. We addressed that
differently in the end with
https://lore.kernel.org/kexec/20231213064004.2419447-1-jgowans@amazon.com/ )

At the point where the its_kexec() hook in this patch has completed, we
poisoned the (ex-) vLPI pending tables and then scanned for corruption
in them. We saw the same characteristic pattern of corruption which had
been breaking the next kernel after kexec: 32 bytes copied from offset
0 to offset 32 in a page, followed by bytes 0, 1, 32, 33, 34, 35 being
zeroed.

Adding a few milliseconds of sleep before the poisoning was enough to
make the problem go away. As is the patch which calls unmap_all_vpes()
∀ kvm.

Of course, if the GIC were behind an IOMMU as all DMA-capable devices
should be, this might never have happened...

diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index f407cce9ecaa..a4fde376d214 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,12 @@ struct gic_quirk {
 	u32 mask;
 };
 
+struct redist_region {
+	void __iomem		*redist_base;
+	phys_addr_t		phys_base;
+	bool			single_redist;
+};
+
 int gic_configure_irq(unsigned int irq, unsigned int type,
                        void __iomem *base, void (*sync_access)(void));
 void gic_dist_config(void __iomem *base, int gic_irqs,
@@ -33,4 +39,6 @@ void gic_enable_of_quirks(const struct device_node *np,
 #define RDIST_FLAGS_RD_TABLES_PREALLOCATED     (1 << 1)
 #define RDIST_FLAGS_FORCE_NON_SHAREABLE        (1 << 2)
 
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *));
+
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 638f7eb033ad..d106b6ccca8b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4902,6 +4902,51 @@ static void its_enable_quirks(struct its_node *its)
 				     its_quirks, its);
 }
 
+static int disable_vpes(struct redist_region *region, void __iomem *ptr)
+{
+	u64 typer;
+	u64 val;
+
+	typer = gic_read_typer(ptr + GICR_TYPER);
+
+	if (!((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)))
+		return 1;
+
+	/* Deactivate any present vPE */
+	its_clear_vpend_valid(ptr + SZ_128K, 0, GICR_VPENDBASER_PendingLast);
+
+	/* Mark the VPE table as invalid */
+	val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER);
+	val &= ~GICR_VPROPBASER_4_1_VALID;
+	gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER);
+
+	/* Disable next redistributor */
+	return 1;
+}
+
+static int its_kexec(void)
+{
+	int err = 0, err_return = 0;
+	struct its_node *its;
+
+	raw_spin_lock(&its_lock);
+
+	list_for_each_entry(its, &its_nodes, entry) {
+		err = its_force_quiescent(its->base);
+		if (err) {
+			pr_err("ITS@%pa: failed to quiesce: %d\n",
+			       &its->phys_base, err);
+			err_return = -EBUSY;
+		}
+	}
+
+	gic_iterate_rdists(disable_vpes);
+
+	raw_spin_unlock(&its_lock);
+
+	return err_return;
+}
+
 static int its_save_disable(void)
 {
 	struct its_node *its;
@@ -5001,6 +5046,7 @@ static void its_restore_enable(void)
 static struct syscore_ops its_syscore_ops = {
 	.suspend = its_save_disable,
 	.resume = its_restore_enable,
+	.kexec = its_kexec,
 };
 
 static void __init __iomem *its_map_one(struct resource *res, int *err)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 50143de1791d..2014c5a75a6e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -46,12 +46,6 @@
 
 #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
 
-struct redist_region {
-	void __iomem		*redist_base;
-	phys_addr_t		phys_base;
-	bool			single_redist;
-};
-
 struct gic_chip_data {
 	struct fwnode_handle	*fwnode;
 	phys_addr_t		dist_phys_base;
@@ -968,7 +962,7 @@ static void __init gic_dist_init(void)
 		gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8);
 }
 
-static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
 {
 	int ret = -ENODEV;
 	int i;


Download attachment "smime.p7s" of type "application/pkcs7-signature" (5069 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ