lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250923174903.76283-17-ada.coupriediaz@arm.com>
Date: Tue, 23 Sep 2025 18:49:03 +0100
From: Ada Couprie Diaz <ada.coupriediaz@....com>
To: linux-arm-kernel@...ts.infradead.org
Cc: Catalin Marinas <catalin.marinas@....com>,
	Will Deacon <will@...nel.org>,
	Marc Zyngier <maz@...nel.org>,
	Oliver Upton <oliver.upton@...ux.dev>,
	Ard Biesheuvel <ardb@...nel.org>,
	Joey Gouly <joey.gouly@....com>,
	Suzuki K Poulose <suzuki.poulose@....com>,
	Zenghui Yu <yuzenghui@...wei.com>,
	Andrey Ryabinin <ryabinin.a.a@...il.com>,
	Alexander Potapenko <glider@...gle.com>,
	Andrey Konovalov <andreyknvl@...il.com>,
	Dmitry Vyukov <dvyukov@...gle.com>,
	Vincenzo Frascino <vincenzo.frascino@....com>,
	linux-kernel@...r.kernel.org,
	kvmarm@...ts.linux.dev,
	kasan-dev@...glegroups.com,
	Mark Rutland <mark.rutland@....com>,
	Ada Couprie Diaz <ada.coupriediaz@....com>
Subject: [RFC PATCH 16/16] arm64/io: rework Cortex-A57 erratum 832075 to use callback

The Cortex-A57 erratum 832075 fix implemented by the kernel
replaces all device memory loads with their load-acquire versions.
By using simple instruction-level alternatives to replace the 13k+
instances of such loads, we add more than 50kB of data
to the `.altinstructions` section, and thus the kernel image.

Implement `alt_cb_patch_ldr_to_ldar()` as the alternative callback
to patch LDRs to device memory into LDARs and use it instead
of the alternative instructions.

This lightens the image by around 50kB as predicted, with the same result.

The new callback is safe to be used for alternatives as it is `noinstr`
and the `aarch64_insn_...` functions it uses have been made safe
in previous commits.

Add `alt_cb_patch_ldr_to_ldar()` to the nVHE namespace as
`__vgic_v2_perform_cpuif_access()` uses one of the patched functions.

Signed-off-by: Ada Couprie Diaz <ada.coupriediaz@....com>
---
 arch/arm64/include/asm/io.h    | 27 +++++++++++++++------------
 arch/arm64/kernel/image-vars.h |  1 +
 arch/arm64/kernel/io.c         | 21 +++++++++++++++++++++
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9b96840fb979..ec75bd0a9d76 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -50,13 +50,16 @@ static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
 	asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
 }
 
+void noinstr alt_cb_patch_ldr_to_ldar(struct alt_instr *alt,
+			       __le32 *origptr, __le32 *updptr, int nr_inst);
+
 #define __raw_readb __raw_readb
 static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
-	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
-				 "ldarb %w0, [%1]",
-				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+	asm volatile(ALTERNATIVE_CB("ldrb %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+				 alt_cb_patch_ldr_to_ldar)
 		     : "=r" (val) : "r" (addr));
 	return val;
 }
@@ -66,9 +69,9 @@ static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
 
-	asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
-				 "ldarh %w0, [%1]",
-				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+	asm volatile(ALTERNATIVE_CB("ldrh %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+				 alt_cb_patch_ldr_to_ldar)
 		     : "=r" (val) : "r" (addr));
 	return val;
 }
@@ -77,9 +80,9 @@ static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
 static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
-	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
-				 "ldar %w0, [%1]",
-				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+	asm volatile(ALTERNATIVE_CB("ldr %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+				 alt_cb_patch_ldr_to_ldar)
 		     : "=r" (val) : "r" (addr));
 	return val;
 }
@@ -88,9 +91,9 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
 static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
 {
 	u64 val;
-	asm volatile(ALTERNATIVE("ldr %0, [%1]",
-				 "ldar %0, [%1]",
-				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+	asm volatile(ALTERNATIVE_CB("ldr %0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+				 alt_cb_patch_ldr_to_ldar)
 		     : "=r" (val) : "r" (addr));
 	return val;
 }
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 714b0b5ec5ac..43ac41f87229 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
 KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
 KVM_NVHE_ALIAS(alt_cb_patch_nops);
+KVM_NVHE_ALIAS(alt_cb_patch_ldr_to_ldar);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index fe86ada23c7d..d4dff119f78c 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -9,6 +9,27 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
+noinstr void alt_cb_patch_ldr_to_ldar(struct alt_instr *alt,
+			       __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	u32 rt, rn, size, orinst, altinst;
+
+	BUG_ON(nr_inst != 1);
+
+	orinst = le32_to_cpu(origptr[0]);
+
+	rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, orinst);
+	rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, orinst);
+	/* The size field (31,30) matches the enum used in gen_load_acq below. */
+	size = orinst >> 30;
+
+	altinst = aarch64_insn_gen_load_acq_store_rel(rt, rn, size,
+		AARCH64_INSN_LDST_LOAD_ACQ);
+
+	updptr[0] = cpu_to_le32(altinst);
+}
+EXPORT_SYMBOL(alt_cb_patch_ldr_to_ldar);
+
 /*
  * This generates a memcpy that works on a from/to address which is aligned to
  * bits. Count is in terms of the number of bits sized quantities to copy. It
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ