[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c142f447c59861f3c94b0fea7f055f4ff201fa98.camel@infradead.org>
Date: Mon, 23 Jun 2025 18:38:46 +0200
From: David Woodhouse <dwmw2@...radead.org>
To: Marc Zyngier <maz@...nel.org>, Oliver Upton <oliver.upton@...ux.dev>,
Joey Gouly <joey.gouly@....com>, Suzuki K Poulose <suzuki.poulose@....com>,
Zenghui Yu <yuzenghui@...wei.com>, Catalin Marinas
<catalin.marinas@....com>, Will Deacon <will@...nel.org>, Paolo Bonzini
<pbonzini@...hat.com>, Sebastian Ott <sebott@...hat.com>, Andre Przywara
<andre.przywara@....com>, Thorsten Blum <thorsten.blum@...ux.dev>, Shameer
Kolothum <shameerali.kolothum.thodi@...wei.com>,
linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.linux.dev,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [RFC PATCH 2/2] KVM: arm64: vgic-its: Unmap all vPEs on shutdown
On Mon, 2025-06-23 at 14:27 +0100, David Woodhouse wrote:
> From: David Woodhouse <dwmw@...zon.co.uk>
>
> We observed systems going dark on kexec, due to corruption of the new
> kernel's text (and sometimes the initrd). This was eventually determined
> to be caused by the vLPI pending tables used by the GIC in the previous
> kernel, which were not being quiesced properly.
FWIW this is a previous hack we attempted which *didn't* work. (For
illustration only; ignore the syscore .kexec hook. We addressed that
differently in the end with
https://lore.kernel.org/kexec/20231213064004.2419447-1-jgowans@amazon.com/ )
At the point where the its_kexec() hook in this patch has completed, we
poisoned the (ex-) vLPI pending tables and then scanned for corruption
in them. We saw the same characteristic pattern of corruption which had
been breaking the next kernel after kexec: 32 bytes copied from offset
0 to offset 32 in a page, followed by bytes 0, 1, 32, 33, 34, 35 being
zeroed.
Adding a few milliseconds of sleep before the poisoning was enough to
make the problem go away. As is the patch which calls unmap_all_vpes()
∀ kvm.
Of course, if the GIC were behind an IOMMU as all DMA-capable devices
should be, this might never have happened...
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index f407cce9ecaa..a4fde376d214 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,12 @@ struct gic_quirk {
u32 mask;
};
+struct redist_region {
+ void __iomem *redist_base;
+ phys_addr_t phys_base;
+ bool single_redist;
+};
+
int gic_configure_irq(unsigned int irq, unsigned int type,
void __iomem *base, void (*sync_access)(void));
void gic_dist_config(void __iomem *base, int gic_irqs,
@@ -33,4 +39,6 @@ void gic_enable_of_quirks(const struct device_node *np,
#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1)
#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2)
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *));
+
#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 638f7eb033ad..d106b6ccca8b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4902,6 +4902,51 @@ static void its_enable_quirks(struct its_node *its)
its_quirks, its);
}
+static int disable_vpes(struct redist_region *region, void __iomem *ptr)
+{
+ u64 typer;
+ u64 val;
+
+ typer = gic_read_typer(ptr + GICR_TYPER);
+
+ if (!((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)))
+ return 1;
+
+ /* Deactivate any present vPE */
+ its_clear_vpend_valid(ptr + SZ_128K, 0, GICR_VPENDBASER_PendingLast);
+
+ /* Mark the VPE table as invalid */
+ val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER);
+ val &= ~GICR_VPROPBASER_4_1_VALID;
+ gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER);
+
+ /* Disable next redistributor */
+ return 1;
+}
+
+static int its_kexec(void)
+{
+ int err = 0, err_return = 0;
+ struct its_node *its;
+
+ raw_spin_lock(&its_lock);
+
+ list_for_each_entry(its, &its_nodes, entry) {
+ err = its_force_quiescent(its->base);
+ if (err) {
+ pr_err("ITS@%pa: failed to quiesce: %d\n",
+ &its->phys_base, err);
+ err_return = -EBUSY;
+ }
+ }
+
+ gic_iterate_rdists(disable_vpes);
+
+ raw_spin_unlock(&its_lock);
+
+ return err_return;
+}
+
static int its_save_disable(void)
{
struct its_node *its;
@@ -5001,6 +5046,7 @@ static void its_restore_enable(void)
static struct syscore_ops its_syscore_ops = {
.suspend = its_save_disable,
.resume = its_restore_enable,
+ .kexec = its_kexec,
};
static void __init __iomem *its_map_one(struct resource *res, int *err)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 50143de1791d..2014c5a75a6e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -46,12 +46,6 @@
#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
-struct redist_region {
- void __iomem *redist_base;
- phys_addr_t phys_base;
- bool single_redist;
-};
-
struct gic_chip_data {
struct fwnode_handle *fwnode;
phys_addr_t dist_phys_base;
@@ -968,7 +962,7 @@ static void __init gic_dist_init(void)
gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8);
}
-static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
{
int ret = -ENODEV;
int i;
Download attachment "smime.p7s" of type "application/pkcs7-signature" (5069 bytes)
Powered by blists - more mailing lists