lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240827130829.43632-2-alex.bennee@linaro.org>
Date: Tue, 27 Aug 2024 14:08:27 +0100
From: Alex Bennée <alex.bennee@...aro.org>
To: linux-kernel@...r.kernel.org
Cc: kvm@...r.kernel.org,
	linux-arm-kernel@...ts.infradead.org,
	kvmarm@...ts.linux.dev,
	maz@...nel.org,
	arnd@...aro.org,
	D Scott Phillips <scott@...amperecomputing.com>,
	Alex Bennée <alex.bennee@...aro.org>
Subject: [PATCH 1/3] ampere/arm64: Add a fixup handler for alignment faults in aarch64 code

From: D Scott Phillips <scott@...amperecomputing.com>

A later patch will hand out Device memory in some cases to code
which expects a Normal memory type, as an errata workaround.
Unaligned accesses to Device memory will fault though, so here we
add a fixup handler to emulate faulting accesses, at a performance
penalty.

Many of the instructions in the Loads and Stores group are supported,
but these groups are not handled here:

 * Advanced SIMD load/store multiple structures
 * Advanced SIMD load/store multiple structures (post-indexed)
 * Advanced SIMD load/store single structure
 * Advanced SIMD load/store single structure (post-indexed)
 * Load/store memory tags
 * Load/store exclusive
 * LDAPR/STLR (unscaled immediate)
 * Load register (literal) [cannot Alignment fault]
 * Load/store register (unprivileged)
 * Atomic memory operations
 * Load/store register (pac)

Instruction implementations are translated from the Exploration tools'
ASL specifications.

Upstream-Status: Pending
Signed-off-by: D Scott Phillips <scott@...amperecomputing.com>
[AJB: fix align_ldst_regoff_simdfp]
Signed-off-by: Alex Bennée <alex.bennee@...aro.org>

---
v2
  - fix handling of some registers
vAJB:
  - fix align_ldst_regoff_simdfp
    - fix scale calculation (ternary instead of |)
    - don't skip n == t && n != 31 (not relevant to simd/fp)
    - check for invalid option<1>
    - expand opc & 0x2 check to include size
  - add failure pr_warn to fixup_alignment
---
 arch/arm64/include/asm/insn.h |   1 +
 arch/arm64/mm/Makefile        |   3 +-
 arch/arm64/mm/fault.c         | 721 ++++++++++++++++++++++++++++++++++
 arch/arm64/mm/fault_neon.c    |  59 +++
 4 files changed, 783 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/mm/fault_neon.c

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 8c0a36f72d6fc..d6e926b5046c1 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -431,6 +431,7 @@ __AARCH64_INSN_FUNCS(clrex,	0xFFFFF0FF, 0xD503305F)
 __AARCH64_INSN_FUNCS(ssbb,	0xFFFFFFFF, 0xD503309F)
 __AARCH64_INSN_FUNCS(pssbb,	0xFFFFFFFF, 0xD503349F)
 __AARCH64_INSN_FUNCS(bti,	0xFFFFFF3F, 0xD503241f)
+__AARCH64_INSN_FUNCS(dc_zva,	0xFFFFFFE0, 0xD50B7420)
 
 #undef	__AARCH64_INSN_FUNCS
 
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 60454256945b8..05f1ac75e315c 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y				:= dma-mapping.o extable.o fault.o init.o \
+obj-y				:= dma-mapping.o extable.o fault.o fault_neon.o init.o \
 				   cache.o copypage.o flush.o \
 				   ioremap.o mmap.o pgd.o mmu.o \
 				   context.o proc.o pageattr.o fixmap.o
@@ -13,5 +13,6 @@ obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 obj-$(CONFIG_ARM64_MTE)		+= mteswap.o
 KASAN_SANITIZE_physaddr.o	+= n
 
++CFLAGS_REMOVE_fault_neon.o += -mgeneral-regs-only
 obj-$(CONFIG_KASAN)		+= kasan_init.o
 KASAN_SANITIZE_kasan_init.o	:= n
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 451ba7cbd5adb..744e7b1664b1c 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -5,6 +5,7 @@
  * Copyright (C) 1995  Linus Torvalds
  * Copyright (C) 1995-2004 Russell King
  * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2020 Ampere Computing LLC
  */
 
 #include <linux/acpi.h>
@@ -42,8 +43,10 @@
 #include <asm/system_misc.h>
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
+#include <asm/patching.h>
 
 struct fault_info {
+	/* fault handler, return 0 on successful handling */
 	int	(*fn)(unsigned long far, unsigned long esr,
 		      struct pt_regs *regs);
 	int	sig;
@@ -693,9 +696,727 @@ static int __kprobes do_translation_fault(unsigned long far,
 	return 0;
 }
 
+static int copy_from_user_io(void *to, const void __user *from, unsigned long n)
+{
+	const u8 __user *src = from;
+	u8 *dest = to;
+
+	for (; n; n--)
+		if (get_user(*dest++, src++))
+			break;
+	return n;
+}
+
+static int copy_to_user_io(void __user *to, const void *from, unsigned long n)
+{
+	const u8 *src = from;
+	u8 __user *dest = to;
+
+	for (; n; n--)
+		if (put_user(*src++, dest++))
+			break;
+	return n;
+}
+
+static int align_load(unsigned long addr, int sz, u64 *out)
+{
+	union {
+		u8 d8;
+		u16 d16;
+		u32 d32;
+		u64 d64;
+		char c[8];
+	} data;
+
+	if (sz != 1 && sz != 2 && sz != 4 && sz != 8)
+		return 1;
+	if (is_ttbr0_addr(addr)) {
+		if (copy_from_user_io(data.c, (const void __user *)addr, sz))
+			return 1;
+	} else
+		memcpy_fromio(data.c, (const void __iomem *)addr, sz);
+	switch (sz) {
+	case 1:
+		*out = data.d8;
+		break;
+	case 2:
+		*out = data.d16;
+		break;
+	case 4:
+		*out = data.d32;
+		break;
+	case 8:
+		*out = data.d64;
+		break;
+	default:
+		return 1;
+	}
+	return 0;
+}
+
+static int align_store(unsigned long addr, int sz, u64 val)
+{
+	union {
+		u8 d8;
+		u16 d16;
+		u32 d32;
+		u64 d64;
+		char c[8];
+	} data;
+
+	switch (sz) {
+	case 1:
+		data.d8 = val;
+		break;
+	case 2:
+		data.d16 = val;
+		break;
+	case 4:
+		data.d32 = val;
+		break;
+	case 8:
+		data.d64 = val;
+		break;
+	default:
+		return 1;
+	}
+	if (is_ttbr0_addr(addr)) {
+		if (copy_to_user_io((void __user *)addr, data.c, sz))
+			return 1;
+	} else
+		memcpy_toio((void __iomem *)addr, data.c, sz);
+	return 0;
+}
+
+static int align_dc_zva(unsigned long addr, struct pt_regs *regs)
+{
+	int bs = read_cpuid(DCZID_EL0) & 0xf;
+	int sz = 1 << (bs + 2);
+
+	addr &= ~(sz - 1);
+	if (is_ttbr0_addr(addr)) {
+		for (; sz; sz--) {
+			if (align_store(addr, 1, 0))
+				return 1;
+		}
+	} else
+		memset_io((void *)addr, 0, sz);
+	return 0;
+}
+
+extern u64 __arm64_get_vn_dt(int n, int t);
+extern void __arm64_set_vn_dt(int n, int t, u64 val);
+
+#define get_vn_dt __arm64_get_vn_dt
+#define set_vn_dt __arm64_set_vn_dt
+
+static int align_ldst_pair(u32 insn, struct pt_regs *regs)
+{
+	const u32 OPC = GENMASK(31, 30);
+	const u32 L_MASK = BIT(22);
+
+	int opc = FIELD_GET(OPC, insn);
+	int L = FIELD_GET(L_MASK, insn);
+
+	bool wback = !!(insn & BIT(23));
+	bool postindex = !(insn & BIT(24));
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	int t2 = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT2, insn);
+	bool is_store = !L;
+	bool is_signed = !!(opc & 1);
+	int scale = 2 + (opc >> 1);
+	int datasize = 8 << scale;
+	u64 uoffset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_7, insn);
+	s64 offset = sign_extend64(uoffset, 6) << scale;
+	u64 address;
+	u64 data1, data2;
+	u64 dbytes;
+
+	if ((is_store && (opc & 1)) || opc == 3)
+		return 1;
+
+	if (wback && (t == n || t2 == n) && n != 31)
+		return 1;
+
+	if (!is_store && t == t2)
+		return 1;
+
+	dbytes = datasize / 8;
+
+	address = regs_get_register(regs, n << 3);
+
+	if (!postindex)
+		address += offset;
+
+	if (is_store) {
+		data1 = pt_regs_read_reg(regs, t);
+		data2 = pt_regs_read_reg(regs, t2);
+		if (align_store(address, dbytes, data1) ||
+		    align_store(address + dbytes, dbytes, data2))
+			return 1;
+	} else {
+		if (align_load(address, dbytes, &data1) ||
+		    align_load(address + dbytes, dbytes, &data2))
+			return 1;
+		if (is_signed) {
+			data1 = sign_extend64(data1, datasize - 1);
+			data2 = sign_extend64(data2, datasize - 1);
+		}
+		pt_regs_write_reg(regs, t, data1);
+		pt_regs_write_reg(regs, t2, data2);
+	}
+
+	if (wback) {
+		if (postindex)
+			address += offset;
+		if (n == 31)
+			regs->sp = address;
+		else
+			pt_regs_write_reg(regs, n, address);
+	}
+
+	return 0;
+}
+
+static int align_ldst_pair_simdfp(u32 insn, struct pt_regs *regs)
+{
+	const u32 OPC = GENMASK(31, 30);
+	const u32 L_MASK = BIT(22);
+
+	int opc = FIELD_GET(OPC, insn);
+	int L = FIELD_GET(L_MASK, insn);
+
+	bool wback = !!(insn & BIT(23));
+	bool postindex = !(insn & BIT(24));
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	int t2 = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT2, insn);
+	bool is_store = !L;
+	int scale = 2 + opc;
+	int datasize = 8 << scale;
+	u64 uoffset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_7, insn);
+	s64 offset = sign_extend64(uoffset, 6) << scale;
+	u64 address;
+	u64 data1_d0, data1_d1, data2_d0, data2_d1;
+	u64 dbytes;
+
+	if (opc == 0x3)
+		return 1;
+
+	if (!is_store && t == t2)
+		return 1;
+
+	dbytes = datasize / 8;
+
+	address = regs_get_register(regs, n << 3);
+
+	if (!postindex)
+		address += offset;
+
+	if (is_store) {
+		data1_d0 = get_vn_dt(t, 0);
+		data2_d0 = get_vn_dt(t2, 0);
+		if (datasize == 128) {
+			data1_d1 = get_vn_dt(t, 1);
+			data2_d1 = get_vn_dt(t2, 1);
+			if (align_store(address, 8, data1_d0) ||
+			    align_store(address + 8, 8, data1_d1) ||
+			    align_store(address + 16, 8, data2_d0) ||
+			    align_store(address + 24, 8, data2_d1))
+				return 1;
+		} else {
+			if (align_store(address, dbytes, data1_d0) ||
+			    align_store(address + dbytes, dbytes, data2_d0))
+				return 1;
+		}
+	} else {
+		if (datasize == 128) {
+			if (align_load(address, 8, &data1_d0) ||
+			    align_load(address + 8, 8, &data1_d1) ||
+			    align_load(address + 16, 8, &data2_d0) ||
+			    align_load(address + 24, 8, &data2_d1))
+				return 1;
+		} else {
+			if (align_load(address, dbytes, &data1_d0) ||
+			    align_load(address + dbytes, dbytes, &data2_d0))
+				return 1;
+			data1_d1 = data2_d1 = 0;
+		}
+		set_vn_dt(t, 0, data1_d0);
+		set_vn_dt(t, 1, data1_d1);
+		set_vn_dt(t2, 0, data2_d0);
+		set_vn_dt(t2, 1, data2_d1);
+	}
+
+	if (wback) {
+		if (postindex)
+			address += offset;
+		if (n == 31)
+			regs->sp = address;
+		else
+			pt_regs_write_reg(regs, n, address);
+	}
+
+	return 0;
+}
+
+static int align_ldst_regoff(u32 insn, struct pt_regs *regs)
+{
+	const u32 SIZE = GENMASK(31, 30);
+	const u32 OPC = GENMASK(23, 22);
+	const u32 OPTION = GENMASK(15, 13);
+	const u32 S = BIT(12);
+
+	u32 size = FIELD_GET(SIZE, insn);
+	u32 opc = FIELD_GET(OPC, insn);
+	u32 option = FIELD_GET(OPTION, insn);
+	u32 s = FIELD_GET(S, insn);
+	int scale = size;
+	int extend_len = (option & 0x1) ? 64 : 32;
+	bool extend_unsigned = !(option & 0x4);
+	int shift = s ? scale : 0;
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn);
+	bool is_store;
+	bool is_signed;
+	int regsize;
+	int datasize;
+	u64 offset;
+	u64 address;
+	u64 data;
+
+	if ((opc & 0x2) == 0) {
+		/* store or zero-extending load */
+		is_store = !(opc & 0x1);
+		regsize = size == 0x3 ? 64 : 32;
+		is_signed = false;
+	} else {
+		if (size == 0x3) {
+			if ((opc & 0x1) == 0) {
+				/* prefetch */
+				return 0;
+			} else {
+				/* undefined */
+				return 1;
+			}
+		} else {
+			/* sign-extending load */
+			is_store = false;
+			if (size == 0x2 && (opc & 0x1) == 0x1) {
+				/* undefined */
+				return 1;
+			}
+			regsize = (opc & 0x1) == 0x1 ? 32 : 64;
+			is_signed = true;
+		}
+	}
+
+	datasize = 8 << scale;
+
+	if (n == t && n != 31)
+		return 1;
+
+	offset = pt_regs_read_reg(regs, m);
+	if (extend_len == 32) {
+		offset &= (u32)~0;
+		if (!extend_unsigned)
+			sign_extend64(offset, 31);
+	}
+	offset <<= shift;
+
+	address = regs_get_register(regs, n << 3) + offset;
+
+	if (is_store) {
+		data = pt_regs_read_reg(regs, t);
+		if (align_store(address, datasize / 8, data))
+			return 1;
+	} else {
+		if (align_load(address, datasize / 8, &data))
+			return 1;
+		if (is_signed) {
+			if (regsize == 32)
+				data = sign_extend32(data, datasize - 1);
+			else
+				data = sign_extend64(data, datasize - 1);
+		}
+	}
+
+	return 0;
+}
+
+static int align_ldst_regoff_simdfp(u32 insn, struct pt_regs *regs)
+{
+	const u32 SIZE = GENMASK(31, 30);
+	const u32 OPC = GENMASK(23, 22);
+	const u32 OPTION = GENMASK(15, 13);
+	const u32 S = BIT(12);
+
+	u32 size = FIELD_GET(SIZE, insn);
+	u32 opc = FIELD_GET(OPC, insn);
+	u32 option = FIELD_GET(OPTION, insn);
+	u32 s = FIELD_GET(S, insn);
+	/* this elides the 8/16 bit sign extensions */
+	int extend_len = (option & 0x1) ? 64 : 32;
+	bool extend_unsigned = !(option & 0x4);
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn);
+	bool is_store = !(opc & BIT(0));
+	int scale;
+	int shift;
+	int datasize;
+	u64 offset;
+	u64 address;
+	u64 data_d0, data_d1;
+
+	/* if option<1> == '0' then UNDEFINED; // sub-word index */
+	if ((option & 0x2) == 0) {
+		pr_warn("option<1> == 0 is UNDEFINED");
+		return 1;
+	}
+
+	/* if opc<1> == '1' && size != '00' then UNDEFINED;*/
+	if ((opc & 0x2) && size != 0b00) {
+		pr_warn("opc<1> == '1' && size != '00' is UNDEFINED\n");
+		return 1;
+	}
+
+	/*
+	 * constant integer scale = if opc<1> == '1' then 4 else UInt(size);
+	 */
+	scale = opc & 0x2 ? 4 : size;
+	shift = s ? scale : 0;
+
+	datasize = 8 << scale;
+
+	offset = pt_regs_read_reg(regs, m);
+	if (extend_len == 32) {
+		offset &= (u32)~0;
+		if (!extend_unsigned)
+			sign_extend64(offset, 31);
+	}
+	offset <<= shift;
+
+	address = regs_get_register(regs, n << 3) + offset;
+
+	if (is_store) {
+		data_d0 = get_vn_dt(t, 0);
+		if (datasize == 128) {
+			data_d1 = get_vn_dt(t, 1);
+			if (align_store(address, 8, data_d0) ||
+			    align_store(address + 8, 8, data_d1))
+				return 1;
+		} else {
+			if (align_store(address, datasize / 8, data_d0))
+				return 1;
+		}
+	} else {
+		if (datasize == 128) {
+			if (align_load(address, 8, &data_d0) ||
+			    align_load(address + 8, 8, &data_d1))
+				return 1;
+		} else {
+			if (align_load(address, datasize / 8, &data_d0))
+				return 1;
+			data_d1 = 0;
+		}
+		set_vn_dt(t, 0, data_d0);
+		set_vn_dt(t, 1, data_d1);
+	}
+
+	return 0;
+}
+
+static int align_ldst_imm(u32 insn, struct pt_regs *regs)
+{
+	const u32 SIZE = GENMASK(31, 30);
+	const u32 OPC = GENMASK(23, 22);
+
+	u32 size = FIELD_GET(SIZE, insn);
+	u32 opc = FIELD_GET(OPC, insn);
+	bool wback = !(insn & BIT(24)) && !!(insn & BIT(10));
+	bool postindex = wback && !(insn & BIT(11));
+	int scale = size;
+	u64 offset;
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	bool is_store;
+	bool is_signed;
+	int regsize;
+	int datasize;
+	u64 address;
+	u64 data;
+
+	if (!(insn & BIT(24))) {
+		u64 uoffset =
+			aarch64_insn_decode_immediate(AARCH64_INSN_IMM_9, insn);
+		offset = sign_extend64(uoffset, 8);
+	} else {
+		offset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12, insn);
+		offset <<= scale;
+	}
+
+	if ((opc & 0x2) == 0) {
+		/* store or zero-extending load */
+		is_store = !(opc & 0x1);
+		regsize = size == 0x3 ? 64 : 32;
+		is_signed = false;
+	} else {
+		if (size == 0x3) {
+			if (FIELD_GET(GENMASK(11, 10), insn) == 0 && (opc & 0x1) == 0) {
+				/* prefetch */
+				return 0;
+			} else {
+				/* undefined */
+				return 1;
+			}
+		} else {
+			/* sign-extending load */
+			is_store = false;
+			if (size == 0x2 && (opc & 0x1) == 0x1) {
+				/* undefined */
+				return 1;
+			}
+			regsize = (opc & 0x1) == 0x1 ? 32 : 64;
+			is_signed = true;
+		}
+	}
+
+	datasize = 8 << scale;
+
+	if (n == t && n != 31)
+		return 1;
+
+	address = regs_get_register(regs, n << 3);
+
+	if (!postindex)
+		address += offset;
+
+	if (is_store) {
+		data = pt_regs_read_reg(regs, t);
+		if (align_store(address, datasize / 8, data))
+			return 1;
+	} else {
+		if (align_load(address, datasize / 8, &data))
+			return 1;
+		if (is_signed) {
+			if (regsize == 32)
+				data = sign_extend32(data, datasize - 1);
+			else
+				data = sign_extend64(data, datasize - 1);
+		}
+		pt_regs_write_reg(regs, t, data);
+	}
+
+	if (wback) {
+		if (postindex)
+			address += offset;
+		if (n == 31)
+			regs->sp = address;
+		else
+			pt_regs_write_reg(regs, n, address);
+	}
+
+	return 0;
+}
+
+static int align_ldst_imm_simdfp(u32 insn, struct pt_regs *regs)
+{
+	const u32 SIZE = GENMASK(31, 30);
+	const u32 OPC = GENMASK(23, 22);
+
+	u32 size = FIELD_GET(SIZE, insn);
+	u32 opc = FIELD_GET(OPC, insn);
+	bool wback = !(insn & BIT(24)) && !!(insn & BIT(10));
+	bool postindex = wback && !(insn & BIT(11));
+	int scale = (opc & 0x2) << 1 | size;
+	u64 offset;
+
+	int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn);
+	int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+	bool is_store = !(opc & BIT(0)) ;
+	int datasize;
+	u64 address;
+	u64 data_d0, data_d1;
+
+	if (scale > 4)
+		return 1;
+
+	if (!(insn & BIT(24))) {
+		u64 uoffset =
+			aarch64_insn_decode_immediate(AARCH64_INSN_IMM_9, insn);
+		offset = sign_extend64(uoffset, 8);
+	} else {
+		offset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12, insn);
+		offset <<= scale;
+	}
+
+	datasize = 8 << scale;
+
+	address = regs_get_register(regs, n << 3);
+
+	if (!postindex)
+		address += offset;
+
+	if (is_store) {
+		data_d0 = get_vn_dt(t, 0);
+		if (datasize == 128) {
+			data_d1 = get_vn_dt(t, 1);
+			if (align_store(address, 8, data_d0) ||
+			    align_store(address + 8, 8, data_d1))
+				return 1;
+		} else {
+			if (align_store(address, datasize / 8, data_d0))
+				return 1;
+		}
+	} else {
+		if (datasize == 128) {
+			if (align_load(address, 8, &data_d0) ||
+			    align_load(address + 8, 8, &data_d1))
+				return 1;
+		} else {
+			if (align_load(address, datasize / 8, &data_d0))
+				return 1;
+			data_d1 = 0;
+		}
+		set_vn_dt(t, 0, data_d0);
+		set_vn_dt(t, 1, data_d1);
+	}
+
+	if (wback) {
+		if (postindex)
+			address += offset;
+		if (n == 31)
+			regs->sp = address;
+		else
+			pt_regs_write_reg(regs, n, address);
+	}
+
+	return 0;
+}
+
+static int align_ldst(u32 insn, struct pt_regs *regs)
+{
+	const u32 op0 = FIELD_GET(GENMASK(31, 28), insn);
+	const u32 op1 = FIELD_GET(BIT(26), insn);
+	const u32 op2 = FIELD_GET(GENMASK(24, 23), insn);
+	const u32 op3 = FIELD_GET(GENMASK(21, 16), insn);
+	const u32 op4 = FIELD_GET(GENMASK(11, 10), insn);
+
+	if ((op0 & 0x3) == 0x2) {
+		/*
+		 * |------+-----+-----+-----+-----+-----------------------------------------|
+		 * | op0  | op1 | op2 | op3 | op4 | Decode group                            |
+		 * |------+-----+-----+-----+-----+-----------------------------------------|
+		 * | xx10 | -   |  00 | -   | -   | Load/store no-allocate pair (offset)    |
+		 * | xx10 | -   |  01 | -   | -   | Load/store register pair (post-indexed) |
+		 * | xx10 | -   |  10 | -   | -   | Load/store register pair (offset)       |
+		 * | xx10 | -   |  11 | -   | -   | Load/store register pair (pre-indexed)  |
+		 * |------+-----+-----+-----+-----+-----------------------------------------|
+		 */
+
+		if (op1 == 0) { /* V == 0 */
+			/* general */
+			return align_ldst_pair(insn, regs);
+		} else {
+			/* simdfp */
+			return align_ldst_pair_simdfp(insn, regs);
+		}
+	} else if ((op0 & 0x3) == 0x3 &&
+		   (((op2 & 0x2) == 0 && (op3 & 0x20) == 0 && op4 != 0x2) ||
+		    ((op2 & 0x2) == 0x2))) {
+		/*
+		 * |------+-----+-----+--------+-----+----------------------------------------------|
+		 * | op0  | op1 | op2 |    op3 | op4 | Decode group                                 |
+		 * |------+-----+-----+--------+-----+----------------------------------------------|
+		 * | xx11 | -   |  0x | 0xxxxx |  00 | Load/store register (unscaled immediate)     |
+		 * | xx11 | -   |  0x | 0xxxxx |  01 | Load/store register (immediate post-indexed) |
+		 * | xx11 | -   |  0x | 0xxxxx |  11 | Load/store register (immediate pre-indexed)  |
+		 * | xx11 | -   |  1x |      - |   - | Load/store register (unsigned immediate)     |
+		 * |------+-----+-----+--------+-----+----------------------------------------------|
+		 */
+
+		if (op1 == 0) {  /* V == 0 */
+			/* general */
+			return align_ldst_imm(insn, regs);
+		} else {
+			/* simdfp */
+			return align_ldst_imm_simdfp(insn, regs);
+		}
+	} else if ((op0 & 0x3) == 0x3 && (op2 & 0x2) == 0 &&
+		   (op3 & 0x20) == 0x20 && op4 == 0x2) {
+		/*
+		 * |------+-----+-----+--------+-----+---------------------------------------|
+		 * | op0  | op1 | op2 |    op3 | op4 |                                       |
+		 * |------+-----+-----+--------+-----+---------------------------------------|
+		 * | xx11 | -   |  0x | 1xxxxx |  10 | Load/store register (register offset) |
+		 * |------+-----+-----+--------+-----+---------------------------------------|
+		 */
+		if (op1 == 0) { /* V == 0 */
+			/* general */
+			return align_ldst_regoff(insn, regs);
+		} else {
+			/* simdfp */
+			return align_ldst_regoff_simdfp(insn, regs);
+		}
+	} else
+		return 1;
+}
+
+static int fixup_alignment(unsigned long addr, unsigned int esr,
+			   struct pt_regs *regs)
+{
+	u32 insn;
+	int res;
+
+	if (user_mode(regs)) {
+		__le32 insn_le;
+
+		if (!is_ttbr0_addr(addr))
+			return 1;
+
+		if (get_user(insn_le,
+			     (__le32 __user *)instruction_pointer(regs)))
+			return 1;
+		insn = le32_to_cpu(insn_le);
+	} else {
+		if (aarch64_insn_read((void *)instruction_pointer(regs), &insn))
+			return 1;
+	}
+
+	if (aarch64_insn_is_class_branch_sys(insn)) {
+		if (aarch64_insn_is_dc_zva(insn))
+			res = align_dc_zva(addr, regs);
+		else
+			res = 1;
+	} else if (((insn >> 25) & 0x5) == 0x4) {
+		res = align_ldst(insn, regs);
+	} else {
+		res = 1;
+	}
+
+	if (!res)
+		instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+	else
+		pr_warn("%s: failed to fixup 0x%04x", __func__, insn);
+
+	return res;
+}
+
 static int do_alignment_fault(unsigned long far, unsigned long esr,
 			      struct pt_regs *regs)
 {
+#ifdef CONFIG_ALTRA_ERRATUM_82288
+	 if (!fixup_alignment(far, esr, regs))
+	 return 0;
+#endif
 	if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) &&
 	    compat_user_mode(regs))
 		return do_compat_alignment_fixup(far, regs);
diff --git a/arch/arm64/mm/fault_neon.c b/arch/arm64/mm/fault_neon.c
new file mode 100644
index 0000000000000..d5319ed07d89b
--- /dev/null
+++ b/arch/arm64/mm/fault_neon.c
@@ -0,0 +1,59 @@
+/*
+ * These functions require asimd, which is not accepted by Clang in normal
+ * kernel code, which is compiled with -mgeneral-regs-only. GCC will somehow
+ * eat it regardless, but we want it to be portable, so move these in their
+ * own translation unit. This allows us to turn off -mgeneral-regs-only for
+ * these (where it should be harmless) without risking the compiler doing
+ * wrong things in places where we don't want it to.
+ *
+ * Otherwise this is identical to the original patch.
+ *
+ * -- q66 <q66@...mera-linux.org>
+ *
+ */
+
+#include <linux/types.h>
+
+u64 __arm64_get_vn_dt(int n, int t) {
+	u64 res;
+
+	switch (n) {
+#define V(n)						\
+	case n:						\
+		asm("cbnz %w1, 1f\n\t"			\
+		    "mov %0, v"#n".d[0]\n\t"		\
+		    "b 2f\n\t"				\
+		    "1: mov %0, v"#n".d[1]\n\t"		\
+		    "2:" : "=r" (res) : "r" (t));	\
+		break
+	V( 0); V( 1); V( 2); V( 3); V( 4); V( 5); V( 6); V( 7);
+	V( 8); V( 9); V(10); V(11); V(12); V(13); V(14); V(15);
+	V(16); V(17); V(18); V(19); V(20); V(21); V(22); V(23);
+	V(24); V(25); V(26); V(27); V(28); V(29); V(30); V(31);
+#undef V
+	default:
+		res = 0;
+		break;
+	}
+	return res;
+}
+
+void __arm64_set_vn_dt(int n, int t, u64 val) {
+	switch (n) {
+#define V(n)						\
+	case n:						\
+		asm("cbnz %w1, 1f\n\t"			\
+		    "mov v"#n".d[0], %0\n\t"		\
+		    "b 2f\n\t"				\
+		    "1: mov v"#n".d[1], %0\n\t"		\
+		    "2:" :: "r" (val), "r" (t));	\
+		break
+	V( 0); V( 1); V( 2); V( 3); V( 4); V( 5); V( 6); V( 7);
+	V( 8); V( 9); V(10); V(11); V(12); V(13); V(14); V(15);
+	V(16); V(17); V(18); V(19); V(20); V(21); V(22); V(23);
+	V(24); V(25); V(26); V(27); V(28); V(29); V(30); V(31);
+#undef Q
+	default:
+		break;
+	}
+}
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ