[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250913232404.2690431-5-kees@kernel.org>
Date: Sat, 13 Sep 2025 16:24:01 -0700
From: Kees Cook <kees@...nel.org>
To: Qing Zhao <qing.zhao@...cle.com>
Cc: Kees Cook <kees@...nel.org>,
Andrew Pinski <pinskia@...il.com>,
Jakub Jelinek <jakub@...hat.com>,
Martin Uecker <uecker@...raz.at>,
Richard Biener <rguenther@...e.de>,
Joseph Myers <josmyers@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Jan Hubicka <hubicka@....cz>,
Richard Earnshaw <richard.earnshaw@....com>,
Richard Sandiford <richard.sandiford@....com>,
Marcus Shawcroft <marcus.shawcroft@....com>,
Kyrylo Tkachov <kyrylo.tkachov@....com>,
Kito Cheng <kito.cheng@...il.com>,
Palmer Dabbelt <palmer@...belt.com>,
Andrew Waterman <andrew@...ive.com>,
Jim Wilson <jim.wilson.gcc@...il.com>,
Dan Li <ashimida.1990@...il.com>,
Sami Tolvanen <samitolvanen@...gle.com>,
Ramon de C Valle <rcvalle@...gle.com>,
Joao Moreira <joao@...rdrivepizza.com>,
Nathan Chancellor <nathan@...nel.org>,
Bill Wendling <morbo@...gle.com>,
gcc-patches@....gnu.org,
linux-hardening@...r.kernel.org
Subject: [PATCH v3 5/7] arm: Add ARM 32-bit Kernel Control Flow Integrity implementation
Implement ARM 32-bit KCFI backend supporting ARMv7+:
- Use movw/movt instructions for 32-bit immediate loading.
- Trap debugging through UDF instruction immediate encoding following
AArch64 BRK pattern for encoding registers with useful contents.
- Scratch register allocation using r0/r1 following ARM procedure call
standard for caller-saved temporary registers, though they get
stack spilled due to register pressure. IP (r12) not usable here
because register allocator regularly uses it as the branch target
register.
Assembly Code Pattern for ARM 32-bit:
push {r0, r1} ; Spill r0, r1
ldr r0, [target, #-4] ; Load actual type ID from preamble
movw r1, #type_id_low ; Load expected type (lower 16 bits)
movt r1, #type_id_high ; Load upper 16 bits with top instruction
cmp r0, r1 ; Compare type IDs directly
pop [r0, r1] ; Reload r0, r1
beq .Lkcfi_call ; Branch if typeids match
.Lkcfi_trap: udf #udf_value ; Undefined instruction trap with encoding
.Lkcfi_call: blx/bx target ; Execute validated indirect transfer
UDF Immediate Encoding (following AArch64 ESR pattern):
- UDF instruction immediate encoding format:
0x8000 | ((ExpectedTypeReg & 31) << 5) | (TargetAddrReg & 31)
- ExpectedTypeReg indicates which register contains expected type (R12 = 12)
- TargetAddrReg indicates which register contains target address (0-15)
- Example: udf #33154 (0x817A) = expected type in R12, target address in R2
Build and run tested with Linux kernel ARCH=arm.
gcc/ChangeLog:
config/arm/arm-protos.h: Declare KCFI helpers.
config/arm/arm.cc (arm_maybe_wrap_call_with_kcfi): New function.
(arm_maybe_wrap_call_value_with_kcfi): New function.
(arm_output_kcfi_insn): Emit KCFI assembly.
config/arm/arm.md: Add KCFI RTL patterns and hook expansion.
doc/invoke.texi: Document arm32 nuances.
Signed-off-by: Kees Cook <kees@...nel.org>
---
gcc/config/arm/arm-protos.h | 4 +
gcc/config/arm/arm.cc | 146 ++++++++++++++++++++++++++++++++++++
gcc/config/arm/arm.md | 62 +++++++++++++++
gcc/doc/invoke.texi | 17 +++++
4 files changed, 229 insertions(+)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index ff7e7658f912..ad3dc522e2b9 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -607,6 +607,10 @@ void arm_initialize_isa (sbitmap, const enum isa_feature *);
const char * arm_gen_far_branch (rtx *, int, const char * , const char *);
+rtx arm_maybe_wrap_call_with_kcfi (rtx, rtx);
+rtx arm_maybe_wrap_call_value_with_kcfi (rtx, rtx);
+const char *arm_output_kcfi_insn (rtx_insn *, rtx *);
+
bool arm_mve_immediate_check(rtx, machine_mode, bool);
opt_machine_mode arm_mve_data_mode (scalar_mode, poly_uint64);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 8b951f3d4a67..d06183fd2d53 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -77,6 +77,8 @@
#include "aarch-common-protos.h"
#include "machmode.h"
#include "arm-builtins.h"
+#include "kcfi.h"
+#include "flags.h"
/* This file should be included last. */
#include "target-def.h"
@@ -35803,6 +35805,150 @@ arm_mode_base_reg_class (machine_mode mode)
return MODE_BASE_REG_REG_CLASS (mode);
}
+/* Apply KCFI wrapping to call pattern if needed. */
+
+rtx
+arm_maybe_wrap_call_with_kcfi (rtx pat, rtx addr)
+{
+ /* Only indirect calls need KCFI instrumentation. */
+ bool is_direct_call = SYMBOL_REF_P (addr);
+ if (!is_direct_call)
+ {
+ rtx kcfi_type_rtx = kcfi_get_type_id_for_expanding_gimple_call ();
+ if (kcfi_type_rtx)
+ {
+ /* Extract the CALL from the PARALLEL and wrap it with KCFI. */
+ rtx call_rtx = XVECEXP (pat, 0, 0);
+ rtx kcfi_call = gen_rtx_KCFI (VOIDmode, call_rtx, kcfi_type_rtx);
+
+ /* Replace the CALL in the PARALLEL with the KCFI-wrapped call. */
+ XVECEXP (pat, 0, 0) = kcfi_call;
+ }
+ }
+ return pat;
+}
+
+/* Apply KCFI wrapping to call_value pattern if needed. */
+
+rtx
+arm_maybe_wrap_call_value_with_kcfi (rtx pat, rtx addr)
+{
+ /* Only indirect calls need KCFI instrumentation. */
+ bool is_direct_call = SYMBOL_REF_P (addr);
+ if (!is_direct_call)
+ {
+ rtx kcfi_type_rtx = kcfi_get_type_id_for_expanding_gimple_call ();
+ if (kcfi_type_rtx)
+ {
+ /* Extract the SET from the PARALLEL and wrap its CALL with KCFI. */
+ rtx set_rtx = XVECEXP (pat, 0, 0);
+ rtx call_rtx = SET_SRC (set_rtx);
+ rtx kcfi_call = gen_rtx_KCFI (VOIDmode, call_rtx, kcfi_type_rtx);
+
+ /* Replace the CALL in the SET with the KCFI-wrapped call. */
+ SET_SRC (set_rtx) = kcfi_call;
+ }
+ }
+ return pat;
+}
+
+/* Output the assembly for a KCFI checked call instruction. */
+
+const char *
+arm_output_kcfi_insn (rtx_insn *insn, rtx *operands)
+{
+ /* KCFI requires movw/movt instructions for type ID loading. */
+ if (!TARGET_HAVE_MOVT)
+ sorry ("%<-fsanitize=kcfi%> requires movw/movt instructions (ARMv7 or later)");
+
+ /* KCFI type id. */
+ uint32_t type_id = INTVAL (operands[2]);
+
+ /* Calculate typeid offset from call target. */
+ HOST_WIDE_INT offset = -(4 + kcfi_patchable_entry_prefix_nops);
+
+ /* Calculate trap immediate. */
+ unsigned addr_reg_num = REGNO (operands[0]);
+ unsigned udf_immediate = 0x8000 | (0x1F << 5) | (addr_reg_num & 31);
+
+ /* Generate labels internally. */
+ rtx trap_label = gen_label_rtx ();
+ rtx call_label = gen_label_rtx ();
+
+ /* Get label numbers for custom naming. */
+ int trap_labelno = CODE_LABEL_NUMBER (trap_label);
+ int call_labelno = CODE_LABEL_NUMBER (call_label);
+
+ /* Generate custom label names. */
+ char trap_name[32];
+ char call_name[32];
+ ASM_GENERATE_INTERNAL_LABEL (trap_name, "Lkcfi_trap", trap_labelno);
+ ASM_GENERATE_INTERNAL_LABEL (call_name, "Lkcfi_call", call_labelno);
+
+ /* Create memory operand for the type load. */
+ rtx mem_op = gen_rtx_MEM (SImode,
+ gen_rtx_PLUS (SImode, operands[0],
+ GEN_INT (offset)));
+ rtx temp_operands[6];
+
+ /* Spill r0 and r1 to stack. */
+ output_asm_insn ("push\t{r0, r1}", NULL);
+
+ /* Load actual type from memory using r0. */
+ temp_operands[0] = gen_rtx_REG (SImode, R0_REGNUM);
+ temp_operands[1] = mem_op;
+ output_asm_insn ("ldr\t%0, %1", temp_operands);
+
+ /* Load expected type low 16 bits into r1. */
+ temp_operands[0] = gen_rtx_REG (SImode, R1_REGNUM);
+ temp_operands[1] = GEN_INT (type_id & 0xFFFF);
+ output_asm_insn ("movw\t%0, %1", temp_operands);
+
+ /* Load expected type high 16 bits into r1. */
+ temp_operands[0] = gen_rtx_REG (SImode, R1_REGNUM);
+ temp_operands[1] = GEN_INT ((type_id >> 16) & 0xFFFF);
+ output_asm_insn ("movt\t%0, %1", temp_operands);
+
+ /* Compare types in r0 and r1. */
+ temp_operands[0] = gen_rtx_REG (SImode, R0_REGNUM);
+ temp_operands[1] = gen_rtx_REG (SImode, R1_REGNUM);
+ output_asm_insn ("cmp\t%0, %1", temp_operands);
+
+ /* Restore r0 and r1 from stack. */
+ output_asm_insn ("pop\t{r0, r1}", NULL);
+
+ /* Output conditional branch to call label. */
+ fputs ("\tbeq\t", asm_out_file);
+ assemble_name (asm_out_file, call_name);
+ fputc ('\n', asm_out_file);
+
+ /* Output trap label and UDF instruction. */
+ ASM_OUTPUT_LABEL (asm_out_file, trap_name);
+ temp_operands[0] = GEN_INT (udf_immediate);
+ output_asm_insn ("udf\t%0", temp_operands);
+
+ /* Output pass/call label. */
+ ASM_OUTPUT_LABEL (asm_out_file, call_name);
+
+ /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
+ if (REGNO (operands[0]) == LR_REGNUM)
+ {
+ operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
+ output_asm_insn ("mov\t%0, lr", operands);
+ }
+
+ /* Call or tail call instruction. */
+ if (SIBLING_CALL_P (insn))
+ output_asm_insn ("bx\t%0", operands);
+ else
+ output_asm_insn ("blx\t%0", operands);
+
+ return "";
+}
+
+#undef TARGET_KCFI_SUPPORTED
+#define TARGET_KCFI_SUPPORTED hook_bool_void_true
+
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "ARM"
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 422ae549b65b..646eb0d757b1 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -8629,6 +8629,7 @@
else
{
pat = gen_call_internal (operands[0], operands[1], operands[2]);
+ pat = arm_maybe_wrap_call_with_kcfi (pat, XEXP (operands[0], 0));
arm_emit_call_insn (pat, XEXP (operands[0], 0), false);
}
@@ -8687,6 +8688,20 @@
}
)
+;; KCFI indirect call - KCFI wraps just the call pattern
+(define_insn "*kcfi_call_reg"
+ [(kcfi (call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+ (match_operand 1 "" ""))
+ (match_operand 2 "const_int_operand"))
+ (use (match_operand 3 "" ""))
+ (clobber (reg:SI LR_REGNUM))]
+ "TARGET_32BIT && !SIBLING_CALL_P (insn) && arm_ccfsm_state == 0"
+{
+ return arm_output_kcfi_insn (insn, operands);
+}
+ [(set_attr "type" "call")
+ (set_attr "length" "36")])
+
(define_insn "*call_reg_armv5"
[(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
(match_operand 1 "" ""))
@@ -8753,6 +8768,7 @@
{
pat = gen_call_value_internal (operands[0], operands[1],
operands[2], operands[3]);
+ pat = arm_maybe_wrap_call_value_with_kcfi (pat, XEXP (operands[1], 0));
arm_emit_call_insn (pat, XEXP (operands[1], 0), false);
}
@@ -8799,6 +8815,21 @@
}
}")
+;; KCFI indirect call_value - KCFI wraps just the call pattern
+(define_insn "*kcfi_call_value_reg"
+ [(set (match_operand 0 "" "")
+ (kcfi (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand 2 "" ""))
+ (match_operand 3 "const_int_operand")))
+ (use (match_operand 4 "" ""))
+ (clobber (reg:SI LR_REGNUM))]
+ "TARGET_32BIT && !SIBLING_CALL_P (insn) && arm_ccfsm_state == 0"
+{
+ return arm_output_kcfi_insn (insn, &operands[1]);
+}
+ [(set_attr "type" "call")
+ (set_attr "length" "36")])
+
(define_insn "*call_value_reg_armv5"
[(set (match_operand 0 "" "")
(call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
@@ -8901,6 +8932,7 @@
operands[2] = const0_rtx;
pat = gen_sibcall_internal (operands[0], operands[1], operands[2]);
+ pat = arm_maybe_wrap_call_with_kcfi (pat, XEXP (operands[0], 0));
arm_emit_call_insn (pat, operands[0], true);
DONE;
}"
@@ -8935,11 +8967,26 @@
pat = gen_sibcall_value_internal (operands[0], operands[1],
operands[2], operands[3]);
+ pat = arm_maybe_wrap_call_value_with_kcfi (pat, XEXP (operands[1], 0));
arm_emit_call_insn (pat, operands[1], true);
DONE;
}"
)
+;; KCFI sibling call - KCFI wraps just the call pattern
+(define_insn "*kcfi_sibcall_insn"
+ [(kcfi (call (mem:SI (match_operand:SI 0 "s_register_operand" "Cs"))
+ (match_operand 1 "" ""))
+ (match_operand 2 "const_int_operand"))
+ (return)
+ (use (match_operand 3 "" ""))]
+ "TARGET_32BIT && SIBLING_CALL_P (insn) && arm_ccfsm_state == 0"
+{
+ return arm_output_kcfi_insn (insn, operands);
+}
+ [(set_attr "type" "call")
+ (set_attr "length" "36")])
+
(define_insn "*sibcall_insn"
[(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs, US"))
(match_operand 1 "" ""))
@@ -8960,6 +9007,21 @@
[(set_attr "type" "call")]
)
+;; KCFI sibling call with return value - KCFI wraps just the call pattern
+(define_insn "*kcfi_sibcall_value_insn"
+ [(set (match_operand 0 "" "")
+ (kcfi (call (mem:SI (match_operand:SI 1 "s_register_operand" "Cs"))
+ (match_operand 2 "" ""))
+ (match_operand 3 "const_int_operand")))
+ (return)
+ (use (match_operand 4 "" ""))]
+ "TARGET_32BIT && SIBLING_CALL_P (insn) && arm_ccfsm_state == 0"
+{
+ return arm_output_kcfi_insn (insn, &operands[1]);
+}
+ [(set_attr "type" "call")
+ (set_attr "length" "36")])
+
(define_insn "*sibcall_value_insn"
[(set (match_operand 0 "" "")
(call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,US"))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 972e8e76494f..dfaec475d2e1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18439,6 +18439,23 @@ trap is taken, allowing the kernel to identify both the KCFI violation
and the involved registers for detailed diagnostics (eliminating the need
for a separate @code{.kcfi_traps} section as used on x86_64).
+On ARM 32-bit, KCFI type identifiers are emitted as a @code{.word ID}
+directive (a 32-bit constant) before the function entry. ARM's
+natural 4-byte instruction alignment eliminates the need for additional
+alignment NOPs. When used with @option{-fpatchable-function-entry}, the
+type identifier is placed before any prefix NOPs. The runtime check
+preserves argument registers @code{r0} and @code{r1} using @code{push}
+and @code{pop} instructions, then uses them as scratch registers for
+the type comparison. The expected type is loaded using @code{movw} and
+@...e{movt} instruction pairs for 32-bit immediate values. Type mismatches
+trigger a @code{udf} instruction with an immediate value that encodes
+both the expected type register index and the target address register
+index in the format @code{0x8000 | (type_reg << 5) | addr_reg}. This
+encoding is captured in the UDF immediate field when the trap is taken,
+allowing the kernel to identify both the KCFI violation and the involved
+registers for detailed diagnostics (eliminating the need for a separate
+@...e{.kcfi_traps} section as used on x86_64).
+
KCFI is intended primarily for kernel code and may not be suitable
for user-space applications that rely on techniques incompatible
with strict type checking of indirect calls.
--
2.34.1
Powered by blists - more mailing lists