linux-kernel - Re: [PATCH 1/1] alpha: fix user-space corruption during memory compaction

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f44dfc2337c4c63208c2ca570046ad21@matoro.tk>
Date: Sat, 10 Jan 2026 18:50:55 -0500
From: matoro <matoro_mailinglist_kernel@...oro.tk>
To: Magnus Lindholm <linmag7@...il.com>
Cc: linux-kernel@...r.kernel.org, linux-alpha@...r.kernel.org,
 hch@...radead.org, macro@...am.me.uk, glaubitz@...sik.fu-berlin.de,
 mattst88@...il.com, richard.henderson@...aro.org, ink@...een.parts
Subject: Re: [PATCH 1/1] alpha: fix user-space corruption during memory
 compaction

On 2026-01-02 12:30, Magnus Lindholm wrote:
> Alpha systems can suffer sporadic user-space crashes and heap
> corruption when memory compaction is enabled.
> 
> Symptoms include SIGSEGV, glibc allocator failures (e.g. "unaligned
> tcache chunk"), and compiler internal errors. The failures disappear
> when compaction is disabled or when using global TLB invalidation.
> 
> The root cause is insufficient TLB shootdown during page migration.
> Alpha relies on ASN-based MM context rollover for instruction cache
> coherency, but this alone is not sufficient to prevent stale data or
> instruction translations from surviving migration.
> 
> Fix this by introducing a migration-specific helper that combines:
>   - MM context invalidation (ASN rollover),
>   - immediate per-CPU TLB invalidation (TBI),
>   - synchronous cross-CPU shootdown when required.
> 
> The helper is used only by migration/compaction paths to avoid changing
> global TLB semantics.
> 
> Additionally, update flush_tlb_other(), pte_clear(), to use
> READ_ONCE()/WRITE_ONCE() for correct SMP memory ordering.
> 
> This fixes observed crashes on both UP and SMP Alpha systems.
> 
> Signed-off-by: Magnus Lindholm <linmag7@...il.com>
> ---
>  arch/alpha/include/asm/pgtable.h  |  33 ++++++++-
>  arch/alpha/include/asm/tlbflush.h |   4 +-
>  arch/alpha/mm/Makefile            |   2 +-
>  arch/alpha/mm/tlbflush.c          | 112 ++++++++++++++++++++++++++++++
>  4 files changed, 148 insertions(+), 3 deletions(-)
>  create mode 100644 arch/alpha/mm/tlbflush.c
> 
> diff --git a/arch/alpha/include/asm/pgtable.h 
> b/arch/alpha/include/asm/pgtable.h
> index 90e7a9539102..c9508ec37efc 100644
> --- a/arch/alpha/include/asm/pgtable.h
> +++ b/arch/alpha/include/asm/pgtable.h
> @@ -17,6 +17,7 @@
>  #include <asm/processor.h>	/* For TASK_SIZE */
>  #include <asm/machvec.h>
>  #include <asm/setup.h>
> +#include <linux/page_table_check.h>
> 
>  struct mm_struct;
>  struct vm_area_struct;
> @@ -183,6 +184,9 @@ extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
>  { pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << 
> (32-PAGE_SHIFT)); }
> 
> 
> +extern void migrate_flush_tlb_page(struct vm_area_struct *vma,
> +					unsigned long addr);
> +
>  extern inline unsigned long
>  pmd_page_vaddr(pmd_t pmd)
>  {
> @@ -202,7 +206,7 @@ extern inline int pte_none(pte_t pte)		{ return 
> !pte_val(pte); }
>  extern inline int pte_present(pte_t pte)	{ return pte_val(pte) & 
> _PAGE_VALID; }
>  extern inline void pte_clear(struct mm_struct *mm, unsigned long addr, 
> pte_t *ptep)
>  {
> -	pte_val(*ptep) = 0;
> +	WRITE_ONCE(pte_val(*ptep), 0);
>  }
> 
>  extern inline int pmd_none(pmd_t pmd)		{ return !pmd_val(pmd); }
> @@ -264,6 +268,33 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, 
> unsigned long address)
> 
>  extern pgd_t swapper_pg_dir[1024];
> 
> +#ifdef CONFIG_COMPACTION
> +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
> +
> +static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> +					unsigned long address,
> +					pte_t *ptep)
> +{
> +	pte_t pte = READ_ONCE(*ptep);
> +
> +	pte_clear(mm, address, ptep);
> +	return pte;
> +}
> +
> +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
> +
> +static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
> +				unsigned long addr, pte_t *ptep)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	pte_t pte = ptep_get_and_clear(mm, addr, ptep);
> +
> +	page_table_check_pte_clear(mm, pte);
> +	migrate_flush_tlb_page(vma, addr);
> +	return pte;
> +}
> +
> +#endif
>  /*
>   * The Alpha doesn't have any external MMU info:  the kernel page
>   * tables contain all the necessary information.
> diff --git a/arch/alpha/include/asm/tlbflush.h 
> b/arch/alpha/include/asm/tlbflush.h
> index ba4b359d6c39..0c8529997f54 100644
> --- a/arch/alpha/include/asm/tlbflush.h
> +++ b/arch/alpha/include/asm/tlbflush.h
> @@ -58,7 +58,9 @@ flush_tlb_other(struct mm_struct *mm)
>  	unsigned long *mmc = &mm->context[smp_processor_id()];
>  	/* Check it's not zero first to avoid cacheline ping pong
>  	   when possible.  */
> -	if (*mmc) *mmc = 0;
> +
> +	if (READ_ONCE(*mmc))
> +		WRITE_ONCE(*mmc, 0);
>  }
> 
>  #ifndef CONFIG_SMP
> diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
> index 101dbd06b4ce..2d05664058f6 100644
> --- a/arch/alpha/mm/Makefile
> +++ b/arch/alpha/mm/Makefile
> @@ -3,4 +3,4 @@
>  # Makefile for the linux alpha-specific parts of the memory manager.
>  #
> 
> -obj-y	:= init.o fault.o
> +obj-y	:= init.o fault.o tlbflush.o
> diff --git a/arch/alpha/mm/tlbflush.c b/arch/alpha/mm/tlbflush.c
> new file mode 100644
> index 000000000000..ccbc317b9a34
> --- /dev/null
> +++ b/arch/alpha/mm/tlbflush.c
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Alpha TLB shootdown helpers
> + *
> + * Copyright (C) 2025 Magnus Lindholm <linmag7@...il.com>
> + *
> + * Alpha-specific TLB flush helpers that cannot be expressed purely
> + * as inline functions.
> + *
> + * These helpers provide combined MM context handling (ASN rollover)
> + * and immediate TLB invalidation for page migration and memory
> + * compaction paths, where lazy shootdowns are insufficient.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/smp.h>
> +#include <linux/sched.h>
> +#include <asm/tlbflush.h>
> +#include <asm/pal.h>
> +#include <asm/mmu_context.h>
> +
> +#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
> +
> +/*
> + * Migration/compaction helper: combine mm context (ASN) handling with an
> + * immediate per-page TLB invalidate and (for exec) an instruction barrier.
> + *
> + * This mirrors the SMP combined IPI handler semantics, but runs locally on 
> UP.
> + */
> +#ifndef CONFIG_SMP
> +void migrate_flush_tlb_page(struct vm_area_struct *vma,
> +					   unsigned long addr)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	int tbi_type = (vma->vm_flags & VM_EXEC) ? 3 : 2;
> +
> +	/*
> +	 * First do the mm-context side:
> +	 * If we're currently running this mm, reload a fresh context ASN.
> +	 * Otherwise, mark context invalid.
> +	 *
> +	 * On UP, this is mostly about matching the SMP semantics and ensuring
> +	 * exec/i-cache tagging assumptions hold when compaction migrates pages.
> +	 */
> +	if (mm == current->active_mm)
> +		flush_tlb_current(mm);
> +	else
> +		flush_tlb_other(mm);
> +
> +	/*
> +	 * Then do the immediate translation kill for this VA.
> +	 * For exec mappings, order instruction fetch after invalidation.
> +	 */
> +	tbi(tbi_type, addr);
> +}
> +
> +#else
> +struct tlb_mm_and_addr {
> +	struct mm_struct *mm;
> +	unsigned long addr;
> +	int tbi_type;	/* 2 = DTB, 3 = ITB+DTB */
> +};
> +
> +static void ipi_flush_mm_and_page(void *x)
> +{
> +	struct tlb_mm_and_addr *d = x;
> +
> +	/* Part 1: mm context side (Alpha uses ASN/context as a key mechanism). */
> +	if (d->mm == current->active_mm && !asn_locked())
> +		__load_new_mm_context(d->mm);
> +	else
> +		flush_tlb_other(d->mm);
> +
> +	/* Part 2: immediate per-VA invalidation on this CPU. */
> +	tbi(d->tbi_type, d->addr);
> +}
> +
> +void migrate_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	struct tlb_mm_and_addr d = {
> +		.mm = mm,
> +		.addr = addr,
> +		.tbi_type = (vma->vm_flags & VM_EXEC) ? 3 : 2,
> +	};
> +
> +	/*
> +	 * One synchronous rendezvous: every CPU runs ipi_flush_mm_and_page().
> +	 * This is the "combined" version of flush_tlb_mm + per-page invalidate.
> +	 */
> +	preempt_disable();
> +	on_each_cpu(ipi_flush_mm_and_page, &d, 1);
> +
> +	/*
> +	 * mimic flush_tlb_mm()'s mm_users<=1 optimization.
> +	 */
> +	if (atomic_read(&mm->mm_users) <= 1) {
> +
> +		int cpu, this_cpu;
> +		this_cpu = smp_processor_id();
> +
> +		for (cpu = 0; cpu < NR_CPUS; cpu++) {
> +			if (!cpu_online(cpu) || cpu == this_cpu)
> +				continue;
> +			if (READ_ONCE(mm->context[cpu]))
> +				WRITE_ONCE(mm->context[cpu], 0);
> +		}
> +	}
> +	preempt_enable();
> +}
> +
> +#endif

Tested-by: Matoro Mahri <matoro_mailinglist_kernel@...oro.tk>

I tested this on a DS15 non-SMP kernel and confirmed it resolved the problem.