[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f44dfc2337c4c63208c2ca570046ad21@matoro.tk>
Date: Sat, 10 Jan 2026 18:50:55 -0500
From: matoro <matoro_mailinglist_kernel@...oro.tk>
To: Magnus Lindholm <linmag7@...il.com>
Cc: linux-kernel@...r.kernel.org, linux-alpha@...r.kernel.org,
hch@...radead.org, macro@...am.me.uk, glaubitz@...sik.fu-berlin.de,
mattst88@...il.com, richard.henderson@...aro.org, ink@...een.parts
Subject: Re: [PATCH 1/1] alpha: fix user-space corruption during memory
compaction
On 2026-01-02 12:30, Magnus Lindholm wrote:
> Alpha systems can suffer sporadic user-space crashes and heap
> corruption when memory compaction is enabled.
>
> Symptoms include SIGSEGV, glibc allocator failures (e.g. "unaligned
> tcache chunk"), and compiler internal errors. The failures disappear
> when compaction is disabled or when using global TLB invalidation.
>
> The root cause is insufficient TLB shootdown during page migration.
> Alpha relies on ASN-based MM context rollover for instruction cache
> coherency, but this alone is not sufficient to prevent stale data or
> instruction translations from surviving migration.
>
> Fix this by introducing a migration-specific helper that combines:
> - MM context invalidation (ASN rollover),
> - immediate per-CPU TLB invalidation (TBI),
> - synchronous cross-CPU shootdown when required.
>
> The helper is used only by migration/compaction paths to avoid changing
> global TLB semantics.
>
> Additionally, update flush_tlb_other(), pte_clear(), to use
> READ_ONCE()/WRITE_ONCE() for correct SMP memory ordering.
>
> This fixes observed crashes on both UP and SMP Alpha systems.
>
> Signed-off-by: Magnus Lindholm <linmag7@...il.com>
> ---
> arch/alpha/include/asm/pgtable.h | 33 ++++++++-
> arch/alpha/include/asm/tlbflush.h | 4 +-
> arch/alpha/mm/Makefile | 2 +-
> arch/alpha/mm/tlbflush.c | 112 ++++++++++++++++++++++++++++++
> 4 files changed, 148 insertions(+), 3 deletions(-)
> create mode 100644 arch/alpha/mm/tlbflush.c
>
> diff --git a/arch/alpha/include/asm/pgtable.h
> b/arch/alpha/include/asm/pgtable.h
> index 90e7a9539102..c9508ec37efc 100644
> --- a/arch/alpha/include/asm/pgtable.h
> +++ b/arch/alpha/include/asm/pgtable.h
> @@ -17,6 +17,7 @@
> #include <asm/processor.h> /* For TASK_SIZE */
> #include <asm/machvec.h>
> #include <asm/setup.h>
> +#include <linux/page_table_check.h>
>
> struct mm_struct;
> struct vm_area_struct;
> @@ -183,6 +184,9 @@ extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
> { pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) <<
> (32-PAGE_SHIFT)); }
>
>
> +extern void migrate_flush_tlb_page(struct vm_area_struct *vma,
> + unsigned long addr);
> +
> extern inline unsigned long
> pmd_page_vaddr(pmd_t pmd)
> {
> @@ -202,7 +206,7 @@ extern inline int pte_none(pte_t pte) { return
> !pte_val(pte); }
> extern inline int pte_present(pte_t pte) { return pte_val(pte) &
> _PAGE_VALID; }
> extern inline void pte_clear(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep)
> {
> - pte_val(*ptep) = 0;
> + WRITE_ONCE(pte_val(*ptep), 0);
> }
>
> extern inline int pmd_none(pmd_t pmd) { return !pmd_val(pmd); }
> @@ -264,6 +268,33 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir,
> unsigned long address)
>
> extern pgd_t swapper_pg_dir[1024];
>
> +#ifdef CONFIG_COMPACTION
> +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
> +
> +static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> + unsigned long address,
> + pte_t *ptep)
> +{
> + pte_t pte = READ_ONCE(*ptep);
> +
> + pte_clear(mm, address, ptep);
> + return pte;
> +}
> +
> +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
> +
> +static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
> + unsigned long addr, pte_t *ptep)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + pte_t pte = ptep_get_and_clear(mm, addr, ptep);
> +
> + page_table_check_pte_clear(mm, pte);
> + migrate_flush_tlb_page(vma, addr);
> + return pte;
> +}
> +
> +#endif
> /*
> * The Alpha doesn't have any external MMU info: the kernel page
> * tables contain all the necessary information.
> diff --git a/arch/alpha/include/asm/tlbflush.h
> b/arch/alpha/include/asm/tlbflush.h
> index ba4b359d6c39..0c8529997f54 100644
> --- a/arch/alpha/include/asm/tlbflush.h
> +++ b/arch/alpha/include/asm/tlbflush.h
> @@ -58,7 +58,9 @@ flush_tlb_other(struct mm_struct *mm)
> unsigned long *mmc = &mm->context[smp_processor_id()];
> /* Check it's not zero first to avoid cacheline ping pong
> when possible. */
> - if (*mmc) *mmc = 0;
> +
> + if (READ_ONCE(*mmc))
> + WRITE_ONCE(*mmc, 0);
> }
>
> #ifndef CONFIG_SMP
> diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
> index 101dbd06b4ce..2d05664058f6 100644
> --- a/arch/alpha/mm/Makefile
> +++ b/arch/alpha/mm/Makefile
> @@ -3,4 +3,4 @@
> # Makefile for the linux alpha-specific parts of the memory manager.
> #
>
> -obj-y := init.o fault.o
> +obj-y := init.o fault.o tlbflush.o
> diff --git a/arch/alpha/mm/tlbflush.c b/arch/alpha/mm/tlbflush.c
> new file mode 100644
> index 000000000000..ccbc317b9a34
> --- /dev/null
> +++ b/arch/alpha/mm/tlbflush.c
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Alpha TLB shootdown helpers
> + *
> + * Copyright (C) 2025 Magnus Lindholm <linmag7@...il.com>
> + *
> + * Alpha-specific TLB flush helpers that cannot be expressed purely
> + * as inline functions.
> + *
> + * These helpers provide combined MM context handling (ASN rollover)
> + * and immediate TLB invalidation for page migration and memory
> + * compaction paths, where lazy shootdowns are insufficient.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/smp.h>
> +#include <linux/sched.h>
> +#include <asm/tlbflush.h>
> +#include <asm/pal.h>
> +#include <asm/mmu_context.h>
> +
> +#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
> +
> +/*
> + * Migration/compaction helper: combine mm context (ASN) handling with an
> + * immediate per-page TLB invalidate and (for exec) an instruction barrier.
> + *
> + * This mirrors the SMP combined IPI handler semantics, but runs locally on
> UP.
> + */
> +#ifndef CONFIG_SMP
> +void migrate_flush_tlb_page(struct vm_area_struct *vma,
> + unsigned long addr)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + int tbi_type = (vma->vm_flags & VM_EXEC) ? 3 : 2;
> +
> + /*
> + * First do the mm-context side:
> + * If we're currently running this mm, reload a fresh context ASN.
> + * Otherwise, mark context invalid.
> + *
> + * On UP, this is mostly about matching the SMP semantics and ensuring
> + * exec/i-cache tagging assumptions hold when compaction migrates pages.
> + */
> + if (mm == current->active_mm)
> + flush_tlb_current(mm);
> + else
> + flush_tlb_other(mm);
> +
> + /*
> + * Then do the immediate translation kill for this VA.
> + * For exec mappings, order instruction fetch after invalidation.
> + */
> + tbi(tbi_type, addr);
> +}
> +
> +#else
> +struct tlb_mm_and_addr {
> + struct mm_struct *mm;
> + unsigned long addr;
> + int tbi_type; /* 2 = DTB, 3 = ITB+DTB */
> +};
> +
> +static void ipi_flush_mm_and_page(void *x)
> +{
> + struct tlb_mm_and_addr *d = x;
> +
> + /* Part 1: mm context side (Alpha uses ASN/context as a key mechanism). */
> + if (d->mm == current->active_mm && !asn_locked())
> + __load_new_mm_context(d->mm);
> + else
> + flush_tlb_other(d->mm);
> +
> + /* Part 2: immediate per-VA invalidation on this CPU. */
> + tbi(d->tbi_type, d->addr);
> +}
> +
> +void migrate_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + struct tlb_mm_and_addr d = {
> + .mm = mm,
> + .addr = addr,
> + .tbi_type = (vma->vm_flags & VM_EXEC) ? 3 : 2,
> + };
> +
> + /*
> + * One synchronous rendezvous: every CPU runs ipi_flush_mm_and_page().
> + * This is the "combined" version of flush_tlb_mm + per-page invalidate.
> + */
> + preempt_disable();
> + on_each_cpu(ipi_flush_mm_and_page, &d, 1);
> +
> + /*
> + * mimic flush_tlb_mm()'s mm_users<=1 optimization.
> + */
> + if (atomic_read(&mm->mm_users) <= 1) {
> +
> + int cpu, this_cpu;
> + this_cpu = smp_processor_id();
> +
> + for (cpu = 0; cpu < NR_CPUS; cpu++) {
> + if (!cpu_online(cpu) || cpu == this_cpu)
> + continue;
> + if (READ_ONCE(mm->context[cpu]))
> + WRITE_ONCE(mm->context[cpu], 0);
> + }
> + }
> + preempt_enable();
> +}
> +
> +#endif
Tested-by: Matoro Mahri <matoro_mailinglist_kernel@...oro.tk>
I tested this on a DS15 non-SMP kernel and confirmed it resolved the problem.
Powered by blists - more mailing lists