[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250821072708.3109244-4-kees@kernel.org>
Date: Thu, 21 Aug 2025 00:26:37 -0700
From: Kees Cook <kees@...nel.org>
To: Qing Zhao <qing.zhao@...cle.com>
Cc: Kees Cook <kees@...nel.org>,
gcc-patches@....gnu.org,
Joseph Myers <josmyers@...hat.com>,
Richard Biener <rguenther@...e.de>,
Jan Hubicka <hubicka@....cz>,
Richard Earnshaw <richard.earnshaw@....com>,
Richard Sandiford <richard.sandiford@....com>,
Marcus Shawcroft <marcus.shawcroft@....com>,
Kyrylo Tkachov <kyrylo.tkachov@....com>,
Kito Cheng <kito.cheng@...il.com>,
Palmer Dabbelt <palmer@...belt.com>,
Andrew Waterman <andrew@...ive.com>,
Jim Wilson <jim.wilson.gcc@...il.com>,
Peter Zijlstra <peterz@...radead.org>,
Dan Li <ashimida.1990@...il.com>,
linux-hardening@...r.kernel.org
Subject: [RFC PATCH 4/7] x86: Add x86_64 Kernel Control Flow Integrity implementation
Implement x86_64-specific KCFI backend:
- Function preamble generation with type IDs positioned at -(4+prefix_nops)
offset from function entry point.
- 16-byte alignment of KCFI preambles using calculated prefix NOPs:
aligned(prefix_nops + 5, 16) to maintain cache lines.
- Type-id hash avoids generating ENDBR instruction in type IDs
(0xfa1e0ff3/0xfb1e0ff3 are incremented by 1 to prevent execution).
- On-demand scratch register allocation strategy (r11 as needed).
The clobbers are available both early and late.
- Atomic bundled KCFI check + call/branch sequences using UNSPECV_KCFI
to prevent optimizer separation and maintain security properties.
- Uses the .kcfi_traps section for debugger/runtime metadata.
Assembly Code Pattern layout required by Linux kernel:
movl $inverse_type_id, %r10d ; Load expected type (0 - hash)
addl offset(%target), %r10d ; Add stored type ID from preamble
je .Lpass ; Branch if types match (sum == 0)
.Ltrap: ud2 ; Undefined instruction trap on mismatch
.Lpass: call/jmp *%target ; Execute validated indirect transfer
The initialization of the kcfi callbacks in ix86_option_override()
seems like a hack. I couldn't find a better place to do this.
Build and run tested on x86_64 Linux kernel with various CPU errata
handling alternatives and FineIBT.
Signed-off-by: Kees Cook <kees@...nel.org>
---
gcc/config/i386/i386-protos.h | 4 +
gcc/config/i386/i386-options.cc | 3 +
gcc/config/i386/i386.cc | 128 ++++++++++++++++++++++++++++
gcc/config/i386/i386.md | 144 ++++++++++++++++++++++++++++++++
gcc/doc/invoke.texi | 20 +++++
5 files changed, 299 insertions(+)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee570dd..a5209077506c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -36,6 +36,10 @@ extern void ix86_maybe_emit_epilogue_vzeroupper (void);
extern void ix86_expand_epilogue (int);
extern void ix86_expand_split_stack_prologue (void);
+/* KCFI support. */
+extern void ix86_kcfi_init (void);
+extern void kcfi_emit_trap_with_section (FILE *file, rtx trap_label_rtx);
+
extern void ix86_output_addr_vec_elt (FILE *, int);
extern void ix86_output_addr_diff_elt (FILE *, int, int);
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 09a35ef62980..f7726c3fdd8f 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3180,6 +3180,9 @@ void
ix86_option_override (void)
{
ix86_option_override_internal (true, &global_options, &global_options_set);
+
+ /* Initialize KCFI target hooks for x86-64. */
+ ix86_kcfi_init ();
}
/* Remember the last target of ix86_set_current_function. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 65e04d3760d5..1cecd6be2f57 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-builtins.h"
#include "i386-expand.h"
#include "i386-features.h"
+#include "kcfi.h"
#include "function-abi.h"
#include "rtl-error.h"
#include "gimple-pretty-print.h"
@@ -1700,6 +1701,19 @@ ix86_function_naked (const_tree fn)
return false;
}
+/* Apply x86-64 specific masking to KCFI type ID. */
+static uint32_t
+ix86_kcfi_mask_type_id (uint32_t type_id)
+{
+ /* Avoid embedding ENDBR instructions in KCFI type IDs.
+ ENDBR64: 0xfa1e0ff3, ENDBR32: 0xfb1e0ff3
+ If the type ID matches either instruction encoding, increment by 1. */
+ if (type_id == 0xfa1e0ff3U || type_id == 0xfb1e0ff3U)
+ return type_id + 1;
+
+ return type_id;
+}
+
/* Write the extra assembler code needed to declare a function properly. */
void
@@ -1711,6 +1725,9 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
if (cfun)
cfun->machine->function_label_emitted = true;
+ /* Handle KCFI preamble for non-patchable functions. */
+ kcfi_emit_preamble_if_needed (out_file, decl, false, 0, fname);
+
if (is_ms_hook)
{
int i, filler_count = (TARGET_64BIT ? 32 : 16);
@@ -28456,6 +28473,117 @@ ix86_set_handled_components (sbitmap components)
}
}
+/* Generate KCFI checked call - replaces indirect call with bundled KCFI check + call. */
+static rtx
+ix86_kcfi_gen_checked_call (rtx call_insn, rtx target_reg, uint32_t type_id, HOST_WIDE_INT prefix_nops)
+{
+ rtx inverse_type_id_rtx, offset_rtx, pass_label, trap_label, call_args;
+ bool is_sibcall = false;
+
+ /* Check if this is a sibling call (tail call) */
+ if (CALL_P (call_insn))
+ is_sibcall = SIBLING_CALL_P (call_insn);
+
+ /* Convert type ID to inverse for the check (0 - hash) */
+ uint32_t inverse_type_id = (uint32_t)(0 - type_id);
+ inverse_type_id_rtx = gen_int_mode (inverse_type_id, SImode);
+
+ /* Calculate variable offset: -(4 + prefix_nops) */
+ HOST_WIDE_INT offset = -(4 + prefix_nops);
+ offset_rtx = gen_int_mode (offset, DImode);
+
+ /* Generate unique labels for this check. */
+ pass_label = gen_label_rtx ();
+ trap_label = gen_label_rtx ();
+
+ /* Extract call arguments from original call insn. */
+ rtx pattern = PATTERN (call_insn);
+ if (GET_CODE (pattern) == CALL)
+ call_args = XEXP (pattern, 1);
+ else if (GET_CODE (pattern) == SET && GET_CODE (SET_SRC (pattern)) == CALL)
+ call_args = XEXP (SET_SRC (pattern), 1);
+ else if (GET_CODE (pattern) == PARALLEL)
+ {
+ /* Handle PARALLEL patterns (includes peephole2 optimizations and other legitimate cases) */
+ is_sibcall = true; /* PARALLEL indicates a sibling call. */
+ rtx first_elem = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (first_elem) == CALL)
+ {
+ call_args = XEXP (first_elem, 1);
+ }
+ else if (GET_CODE (first_elem) == SET && GET_CODE (SET_SRC (first_elem)) == CALL)
+ {
+ call_args = XEXP (SET_SRC (first_elem), 1);
+ }
+ else
+ {
+ error ("KCFI: Unexpected PARALLEL pattern structure");
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ /* This should never happen - all indirect calls should match one of the above patterns. */
+ error ("KCFI: Unexpected call pattern structure");
+ gcc_unreachable ();
+ }
+
+ rtx bundled_call;
+ if (is_sibcall)
+ {
+ /* Use sibling call pattern for tail calls. */
+ bundled_call = gen_kcfi_checked_sibcall (target_reg, call_args, inverse_type_id_rtx, offset_rtx, pass_label, trap_label);
+ }
+ else
+ {
+ /* Use regular call pattern. */
+ bundled_call = gen_kcfi_checked_call (target_reg, call_args, inverse_type_id_rtx, offset_rtx, pass_label, trap_label);
+ }
+
+ return bundled_call;
+}
+
+/* Calculate x86_64-specific KCFI prefix NOPs for 16-byte alignment. */
+static int
+ix86_kcfi_calculate_prefix_nops (HOST_WIDE_INT prefix_nops)
+{
+ /* Calculate KCFI NOPs needed: aligned(prefix_nops + 5, 16). */
+ return (16 - ((prefix_nops + 5) % 16)) % 16;
+}
+
+/* Emit x86_64-specific type ID instruction. */
+static void
+ix86_kcfi_emit_type_id_instruction (FILE *file, uint32_t type_id)
+{
+ /* Emit movl instruction with type ID. */
+ fprintf (file, "\tmovl\t$0x%08x, %%eax\n", type_id);
+}
+
+/* Add x86-64 specific register clobbers for KCFI calls. */
+static void
+ix86_kcfi_add_clobbers (rtx_insn *call_insn)
+{
+ /* Add r10/r11 clobbers so register allocator knows they'll be used. */
+ rtx usage = CALL_INSN_FUNCTION_USAGE (call_insn);
+ clobber_reg (&usage, gen_rtx_REG (DImode, R10_REG));
+ clobber_reg (&usage, gen_rtx_REG (DImode, R11_REG));
+ CALL_INSN_FUNCTION_USAGE (call_insn) = usage;
+}
+
+/* Initialize x86-64 KCFI target hooks. */
+void
+ix86_kcfi_init (void)
+{
+ if (TARGET_64BIT && (flag_sanitize & SANITIZE_KCFI))
+ {
+ kcfi_target.mask_type_id = ix86_kcfi_mask_type_id;
+ kcfi_target.gen_kcfi_checked_call = ix86_kcfi_gen_checked_call;
+ kcfi_target.add_kcfi_clobbers = ix86_kcfi_add_clobbers;
+ kcfi_target.calculate_prefix_nops = ix86_kcfi_calculate_prefix_nops;
+ kcfi_target.emit_type_id_instruction = ix86_kcfi_emit_type_id_instruction;
+ }
+}
+
#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a50475bdaf4c..acefc2246537 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -248,6 +248,7 @@
UNSPECV_RDGSBASE
UNSPECV_WRFSBASE
UNSPECV_WRGSBASE
+ UNSPECV_KCFI
UNSPECV_FXSAVE
UNSPECV_FXRSTOR
UNSPECV_FXSAVE64
@@ -30582,6 +30583,149 @@
(set_attr "type" "other")
(set_attr "mode" "<MODE>")])
+;; KCFI checked call - atomic KCFI check + indirect call bundle
+;; This prevents optimizer from separating KCFI checks from their protected calls
+(define_insn "kcfi_checked_call"
+ [(call (mem:QI (match_operand:DI 0 "nonimmediate_operand" "rm"))
+ (match_operand 1))
+ (unspec_volatile [(match_operand:SI 2 "const_int_operand" "n")
+ (match_operand:DI 3 "const_int_operand" "n")
+ (label_ref (match_operand 4))
+ (label_ref (match_operand 5))] UNSPECV_KCFI)
+ (clobber (reg:SI R10_REG))
+ (clobber (reg:SI R11_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+{
+ rtx target_reg;
+ bool need_r11 = false;
+
+ /* If target is not in a register, move it to r11. */
+ if (!REG_P (operands[0]))
+ {
+ target_reg = gen_rtx_REG (DImode, R11_REG);
+ /* Emit the move to r11. */
+ rtx mov_to_r11[2] = { target_reg, operands[0] };
+ output_asm_insn ("movq\t%1, %0", mov_to_r11);
+ need_r11 = true;
+ }
+ else
+ {
+ target_reg = operands[0];
+ }
+
+ /* Choose scratch register: r10 by default, r11 if r10 is the target. */
+ bool target_is_r10 = (REG_P (target_reg) && REGNO (target_reg) == R10_REG);
+ int scratch_reg = target_is_r10 ? R11_REG : R10_REG;
+ const char *scratch_name = target_is_r10 ? "r11d" : "r10d";
+
+ /* Output complete KCFI check + call sequence atomically. */
+ char mov_insn[64];
+ sprintf (mov_insn, "movl\t$%%c2, %%%%%s", scratch_name);
+ output_asm_insn (mov_insn, operands);
+
+ /* Create memory operand for the addl instruction. */
+ rtx mem_op = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, target_reg, operands[3]));
+ rtx temp_operands[2] = { mem_op, gen_rtx_REG (SImode, scratch_reg) };
+ output_asm_insn ("addl\t%0, %1", temp_operands);
+
+ output_asm_insn ("je\t%l4", operands);
+
+ /* Output trap label and instruction. */
+ output_asm_insn ("%l5:", operands);
+ output_asm_insn ("ud2", operands);
+
+ /* Use existing function with trap and entry label RTX. */
+ kcfi_emit_trap_with_section (asm_out_file, operands[5]);
+
+ /* Output pass label. */
+ output_asm_insn ("%l4:", operands);
+
+ /* Finally emit the protected call using the register we chose. */
+ if (need_r11)
+ {
+ rtx r11_operand = gen_rtx_REG (DImode, R11_REG);
+ output_asm_insn ("call\t*%0", &r11_operand);
+ return "";
+ }
+ else
+ return "call\t*%0";
+}
+ [(set_attr "type" "call")
+ (set_attr "mode" "DI")])
+
+;; KCFI checked sibling call - atomic KCFI check + indirect sibling call bundle
+;; This handles tail call optimization cases
+(define_insn "kcfi_checked_sibcall"
+ [(call (mem:QI (match_operand:DI 0 "nonimmediate_operand" "rm"))
+ (match_operand 1))
+ (unspec_volatile [(match_operand:SI 2 "const_int_operand" "n")
+ (match_operand:DI 3 "const_int_operand" "n")
+ (label_ref (match_operand 4))
+ (label_ref (match_operand 5))] UNSPECV_KCFI)
+ (clobber (reg:SI R10_REG))
+ (clobber (reg:SI R11_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+ rtx target_reg;
+ bool need_r11 = false;
+
+ /* If target is not in a register, move it to r11. */
+ if (!REG_P (operands[0]))
+ {
+ target_reg = gen_rtx_REG (DImode, R11_REG);
+ /* Emit the move to r11. */
+ rtx mov_to_r11[2] = { target_reg, operands[0] };
+ output_asm_insn ("movq\t%1, %0", mov_to_r11);
+ need_r11 = true;
+ }
+ else
+ {
+ target_reg = operands[0];
+ }
+
+ /* Choose scratch register: r10 by default, r11 if r10 is the target. */
+ bool target_is_r10 = (REG_P (target_reg) && REGNO (target_reg) == R10_REG);
+ int scratch_reg = target_is_r10 ? R11_REG : R10_REG;
+ const char *scratch_name = target_is_r10 ? "r11d" : "r10d";
+
+ /* Output complete KCFI check + sibling call sequence atomically. */
+ char mov_insn[64];
+ sprintf (mov_insn, "movl\t$%%c2, %%%%%s", scratch_name);
+ output_asm_insn (mov_insn, operands);
+
+ /* Create memory operand for the addl instruction. */
+ rtx mem_op = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, target_reg, operands[3]));
+ rtx temp_operands[2] = { mem_op, gen_rtx_REG (SImode, scratch_reg) };
+ output_asm_insn ("addl\t%0, %1", temp_operands);
+
+ output_asm_insn ("je\t%l4", operands);
+
+ /* Output trap label and instruction. */
+ output_asm_insn ("%l5:", operands);
+ output_asm_insn ("ud2", operands);
+
+ /* Use existing function with trap and entry label RTX. */
+ kcfi_emit_trap_with_section (asm_out_file, operands[5]);
+
+ /* Output pass label. */
+ output_asm_insn ("%l4:", operands);
+
+ /* Finally emit the protected sibling call (jmp) using the register we chose. */
+ if (need_r11)
+ {
+ rtx r11_operand = gen_rtx_REG (DImode, R11_REG);
+ output_asm_insn ("jmp\t*%0", &r11_operand);
+ return "";
+ }
+ else
+ return "jmp\t*%0";
+}
+ [(set_attr "type" "call")
+ (set_attr "mode" "DI")])
+
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c66f47336826..f531a9f6ce33 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18316,6 +18316,26 @@ and without changing the entry points of the target functions. Only
functions that have referenced by their address receive the KCFI preamble
instrumentation.
+Platform-specific implementation details:
+
+On x86_64, KCFI type identifiers are emitted as a @code{movl $ID, %eax}
+instruction before the function entry. The implementation ensures that
+type IDs never collide with ENDBR instruction encodings. When used with
+@...ion{-fpatchable-function-entry}, the type identifier is placed before
+any patchable NOPs, with appropriate alignment to maintain a 16-byte
+boundary for the function entry. The runtime check loads the type ID
+from the target function into @code{%r10d} and uses an @code{addl}
+instruction to add the negative expected type ID, effectively zeroing
+the register if the types match. A conditional jump follows to either
+continue execution or trap on mismatch. The check sequence uses
+@...e{%r10d} and @code{%r11d} as scratch registers. Trap locations are
+recorded in a special @code{.kcfi_traps} section that maps trap sites
+to their corresponding function entry points, enabling debuggers and
+crash handlers to identify KCFI violations. The exact instruction
+sequences for both the KCFI preamble and the check-call bundle are
+considered ABI, as the Linux kernel may optionally rewrite these areas
+at boot time to mitigate detected CPU errata.
+
KCFI is intended primarily for kernel code and may not be suitable
for user-space applications that rely on techniques incompatible
with strict type checking of indirect calls.
--
2.34.1
Powered by blists - more mailing lists