[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2899b993-af9c-41df-b36f-c6a5235089e2@amd.com>
Date: Sat, 5 Jul 2025 14:36:04 +0530
From: Vasant Hegde <vasant.hegde@....com>
To: Lu Baolu <baolu.lu@...ux.intel.com>, Joerg Roedel <joro@...tes.org>,
Will Deacon <will@...nel.org>, Robin Murphy <robin.murphy@....com>,
Kevin Tian <kevin.tian@...el.com>, Jason Gunthorpe <jgg@...dia.com>,
Jann Horn <jannh@...gle.com>, Dave Hansen <dave.hansen@...el.com>,
Alistair Popple <apopple@...dia.com>, Peter Zijlstra <peterz@...radead.org>,
Uladzislau Rezki <urezki@...il.com>,
Jean-Philippe Brucker <jean-philippe@...aro.org>,
Andy Lutomirski <luto@...nel.org>
Cc: iommu@...ts.linux.dev, security@...nel.org, linux-kernel@...r.kernel.org,
stable@...r.kernel.org
Subject: Re: [PATCH 1/1] iommu/sva: Invalidate KVA range on kernel TLB flush
On 7/4/2025 7:00 PM, Lu Baolu wrote:
> The vmalloc() and vfree() functions manage virtually contiguous, but not
> necessarily physically contiguous, kernel memory regions. When vfree()
> unmaps such a region, it tears down the associated kernel page table
> entries and frees the physical pages.
>
> In the IOMMU Shared Virtual Addressing (SVA) context, the IOMMU hardware
> shares and walks the CPU's page tables. Architectures like x86 share
> static kernel address mappings across all user page tables, allowing the
> IOMMU to access the kernel portion of these tables.
>
> Modern IOMMUs often cache page table entries to optimize walk performance,
> even for intermediate page table levels. If kernel page table mappings are
> changed (e.g., by vfree()), but the IOMMU's internal caches retain stale
> entries, Use-After-Free (UAF) vulnerability condition arises. If these
> freed page table pages are reallocated for a different purpose, potentially
> by an attacker, the IOMMU could misinterpret the new data as valid page
> table entries. This allows the IOMMU to walk into attacker-controlled
> memory, leading to arbitrary physical memory DMA access or privilege
> escalation.
>
> To mitigate this, introduce a new iommu interface to flush IOMMU caches
> and fence pending page table walks when kernel page mappings are updated.
> This interface should be invoked from architecture-specific code that
> manages combined user and kernel page tables.
>
> Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices")
> Cc: stable@...r.kernel.org
> Co-developed-by: Jason Gunthorpe <jgg@...dia.com>
> Signed-off-by: Jason Gunthorpe <jgg@...dia.com>
> Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
Thanks for getting this patch. Looks good to me.
Reviewed-by: Vasant Hegde <vasant.hegde@....com>
-Vasant
> ---
> arch/x86/mm/tlb.c | 2 ++
> drivers/iommu/iommu-sva.c | 32 +++++++++++++++++++++++++++++++-
> include/linux/iommu.h | 4 ++++
> 3 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 39f80111e6f1..a41499dfdc3f 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -12,6 +12,7 @@
> #include <linux/task_work.h>
> #include <linux/mmu_notifier.h>
> #include <linux/mmu_context.h>
> +#include <linux/iommu.h>
>
> #include <asm/tlbflush.h>
> #include <asm/mmu_context.h>
> @@ -1540,6 +1541,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> kernel_tlb_flush_range(info);
>
> put_flush_tlb_info();
> + iommu_sva_invalidate_kva_range(start, end);
> }
>
> /*
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index 1a51cfd82808..154384eab8a3 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -10,6 +10,8 @@
> #include "iommu-priv.h"
>
> static DEFINE_MUTEX(iommu_sva_lock);
> +static DEFINE_STATIC_KEY_FALSE(iommu_sva_present);
> +static LIST_HEAD(iommu_sva_mms);
> static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
> struct mm_struct *mm);
>
> @@ -42,6 +44,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
> return ERR_PTR(-ENOSPC);
> }
> iommu_mm->pasid = pasid;
> + iommu_mm->mm = mm;
> INIT_LIST_HEAD(&iommu_mm->sva_domains);
> /*
> * Make sure the write to mm->iommu_mm is not reordered in front of
> @@ -132,8 +135,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
> if (ret)
> goto out_free_domain;
> domain->users = 1;
> - list_add(&domain->next, &mm->iommu_mm->sva_domains);
>
> + if (list_empty(&iommu_mm->sva_domains)) {
> + if (list_empty(&iommu_sva_mms))
> + static_branch_enable(&iommu_sva_present);
> + list_add(&iommu_mm->mm_list_elm, &iommu_sva_mms);
> + }
> + list_add(&domain->next, &iommu_mm->sva_domains);
> out:
> refcount_set(&handle->users, 1);
> mutex_unlock(&iommu_sva_lock);
> @@ -175,6 +183,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
> list_del(&domain->next);
> iommu_domain_free(domain);
> }
> +
> + if (list_empty(&iommu_mm->sva_domains)) {
> + list_del(&iommu_mm->mm_list_elm);
> + if (list_empty(&iommu_sva_mms))
> + static_branch_disable(&iommu_sva_present);
> + }
> +
> mutex_unlock(&iommu_sva_lock);
> kfree(handle);
> }
> @@ -312,3 +327,18 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
>
> return domain;
> }
> +
> +void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end)
> +{
> + struct iommu_mm_data *iommu_mm;
> +
> + might_sleep();
> +
> + if (!static_branch_unlikely(&iommu_sva_present))
> + return;
> +
> + guard(mutex)(&iommu_sva_lock);
> + list_for_each_entry(iommu_mm, &iommu_sva_mms, mm_list_elm)
> + mmu_notifier_arch_invalidate_secondary_tlbs(iommu_mm->mm, start, end);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_invalidate_kva_range);
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 156732807994..31330c12b8ee 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -1090,7 +1090,9 @@ struct iommu_sva {
>
> struct iommu_mm_data {
> u32 pasid;
> + struct mm_struct *mm;
> struct list_head sva_domains;
> + struct list_head mm_list_elm;
> };
>
> int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode);
> @@ -1571,6 +1573,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> struct mm_struct *mm);
> void iommu_sva_unbind_device(struct iommu_sva *handle);
> u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> +void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end);
> #else
> static inline struct iommu_sva *
> iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
> @@ -1595,6 +1598,7 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
> }
>
> static inline void mm_pasid_drop(struct mm_struct *mm) {}
> +static inline void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end) {}
> #endif /* CONFIG_IOMMU_SVA */
>
> #ifdef CONFIG_IOMMU_IOPF
Powered by blists - more mailing lists