[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <D57Q652K9SCW.2LESCVR7ZK8BK@kernel.org>
Date: Mon, 28 Oct 2024 22:34:51 +0200
From: "Jarkko Sakkinen" <jarkko@...nel.org>
To: "Lorenzo Stoakes" <lorenzo.stoakes@...cle.com>, "Andrew Morton"
<akpm@...ux-foundation.org>
Cc: "Suren Baghdasaryan" <surenb@...gle.com>, "Liam R . Howlett"
<Liam.Howlett@...cle.com>, "Matthew Wilcox" <willy@...radead.org>,
"Vlastimil Babka" <vbabka@...e.cz>, "Paul E . McKenney"
<paulmck@...nel.org>, "Jann Horn" <jannh@...gle.com>, "David Hildenbrand"
<david@...hat.com>, <linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>,
"Muchun Song" <muchun.song@...ux.dev>, "Richard Henderson"
<richard.henderson@...aro.org>, "Matt Turner" <mattst88@...il.com>, "Thomas
Bogendoerfer" <tsbogend@...ha.franken.de>, "James E . J . Bottomley"
<James.Bottomley@...senPartnership.com>, "Helge Deller" <deller@....de>,
"Chris Zankel" <chris@...kel.net>, "Max Filippov" <jcmvbkbc@...il.com>,
"Arnd Bergmann" <arnd@...nel.org>, <linux-alpha@...r.kernel.org>,
<linux-mips@...r.kernel.org>, <linux-parisc@...r.kernel.org>,
<linux-arch@...r.kernel.org>, "Shuah Khan" <shuah@...nel.org>, "Christian
Brauner" <brauner@...nel.org>, <linux-kselftest@...r.kernel.org>,
"Sidhartha Kumar" <sidhartha.kumar@...cle.com>, "Jeff Xu"
<jeffxu@...omium.org>, "Christoph Hellwig" <hch@...radead.org>,
<linux-api@...r.kernel.org>, "John Hubbard" <jhubbard@...dia.com>
Subject: Re: [PATCH v3 2/5] mm: add PTE_MARKER_GUARD PTE marker
On Wed Oct 23, 2024 at 7:24 PM EEST, Lorenzo Stoakes wrote:
> Add a new PTE marker that results in any access causing the accessing
> process to segfault.
>
> This is preferable to PTE_MARKER_POISONED, which results in the same
> handling as hardware poisoned memory, and is thus undesirable for cases
> where we simply wish to 'soft' poison a range.
>
> This is in preparation for implementing the ability to specify guard pages
> at the page table level, i.e. ranges that, when accessed, should cause
> process termination.
>
> Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the
> function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single
> purpose was simply incorrect.
>
> We then reuse the same logic to determine whether a zap should clear a
> guard entry - this should only be performed on teardown and never on
> MADV_DONTNEED or MADV_FREE.
>
> We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard marker
> be encountered there, as we explicitly do not support this operation and
> this should not occur.
>
> Acked-by: Vlastimil Babka <vbabkba@...e.cz>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> include/linux/mm_inline.h | 2 +-
> include/linux/swapops.h | 24 +++++++++++++++++++++++-
> mm/hugetlb.c | 4 ++++
> mm/memory.c | 18 +++++++++++++++---
> mm/mprotect.c | 6 ++++--
> 5 files changed, 47 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index 355cf46a01a6..1b6a917fffa4 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker(
> {
> pte_marker srcm = pte_marker_get(entry);
> /* Always copy error entries. */
> - pte_marker dstm = srcm & PTE_MARKER_POISONED;
> + pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD);
>
> /* Only copy PTE markers if UFFD register matches. */
> if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index cb468e418ea1..96f26e29fefe 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -426,9 +426,19 @@ typedef unsigned long pte_marker;
> * "Poisoned" here is meant in the very general sense of "future accesses are
> * invalid", instead of referring very specifically to hardware memory errors.
> * This marker is meant to represent any of various different causes of this.
> + *
> + * Note that, when encountered by the faulting logic, PTEs with this marker will
> + * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error
> + * logic.
> */
> #define PTE_MARKER_POISONED BIT(1)
> -#define PTE_MARKER_MASK (BIT(2) - 1)
> +/*
> + * Indicates that, on fault, this PTE will case a SIGSEGV signal to be
> + * sent. This means guard markers behave in effect as if the region were mapped
> + * PROT_NONE, rather than if they were a memory hole or equivalent.
> + */
> +#define PTE_MARKER_GUARD BIT(2)
> +#define PTE_MARKER_MASK (BIT(3) - 1)
>
> static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> {
> @@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry)
> {
> return is_pte_marker_entry(entry) &&
> (pte_marker_get(entry) & PTE_MARKER_POISONED);
> +
> +}
> +
> +static inline swp_entry_t make_guard_swp_entry(void)
> +{
> + return make_pte_marker_entry(PTE_MARKER_GUARD);
> +}
> +
> +static inline int is_guard_swp_entry(swp_entry_t entry)
> +{
> + return is_pte_marker_entry(entry) &&
> + (pte_marker_get(entry) & PTE_MARKER_GUARD);
> }
>
> /*
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 906294ac85dc..2c8c5da0f5d3 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6353,6 +6353,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> ret = VM_FAULT_HWPOISON_LARGE |
> VM_FAULT_SET_HINDEX(hstate_index(h));
> goto out_mutex;
> + } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) {
> + /* This isn't supported in hugetlb. */
> + ret = VM_FAULT_SIGSEGV;
> + goto out_mutex;
> }
> }
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 0f614523b9f4..551455cd453f 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details *details,
> return !folio_test_anon(folio);
> }
>
> -static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
> +static inline bool zap_drop_markers(struct zap_details *details)
> {
> if (!details)
> return false;
> @@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
> if (vma_is_anonymous(vma))
> return;
>
> - if (zap_drop_file_uffd_wp(details))
> + if (zap_drop_markers(details))
> return;
>
> for (;;) {
> @@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
> * drop the marker if explicitly requested.
> */
> if (!vma_is_anonymous(vma) &&
> - !zap_drop_file_uffd_wp(details))
> + !zap_drop_markers(details))
> + continue;
> + } else if (is_guard_swp_entry(entry)) {
> + /*
> + * Ordinary zapping should not remove guard PTE
> + * markers. Only do so if we should remove PTE markers
> + * in general.
> + */
> + if (!zap_drop_markers(details))
> continue;
> } else if (is_hwpoison_entry(entry) ||
> is_poisoned_swp_entry(entry)) {
> @@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
> if (marker & PTE_MARKER_POISONED)
> return VM_FAULT_HWPOISON;
>
> + /* Hitting a guard page is always a fatal condition. */
> + if (marker & PTE_MARKER_GUARD)
> + return VM_FAULT_SIGSEGV;
> +
> if (pte_marker_entry_uffd_wp(entry))
> return pte_marker_handle_uffd_wp(vmf);
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 0c5d6d06107d..1f671b0667bd 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -236,9 +236,11 @@ static long change_pte_range(struct mmu_gather *tlb,
> } else if (is_pte_marker_entry(entry)) {
> /*
> * Ignore error swap entries unconditionally,
> - * because any access should sigbus anyway.
> + * because any access should sigbus/sigsegv
> + * anyway.
> */
> - if (is_poisoned_swp_entry(entry))
> + if (is_poisoned_swp_entry(entry) ||
> + is_guard_swp_entry(entry))
> continue;
> /*
> * If this is uffd-wp pte marker and we'd like
Acked-by: Jarkko Sakkinen <jarkko@...nel.org>
BR, Jarkko
Powered by blists - more mailing lists