lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190221172050.GH2813@redhat.com>
Date:   Thu, 21 Feb 2019 12:20:50 -0500
From:   Jerome Glisse <jglisse@...hat.com>
To:     Peter Xu <peterx@...hat.com>
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        David Hildenbrand <david@...hat.com>,
        Hugh Dickins <hughd@...gle.com>,
        Maya Gokhale <gokhale2@...l.gov>,
        Pavel Emelyanov <xemul@...tuozzo.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Martin Cracauer <cracauer@...s.org>, Shaohua Li <shli@...com>,
        Marty McFadden <mcfadden8@...l.gov>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Mike Kravetz <mike.kravetz@...cle.com>,
        Denis Plotnikov <dplotnikov@...tuozzo.com>,
        Mike Rapoport <rppt@...ux.vnet.ibm.com>,
        Mel Gorman <mgorman@...e.de>,
        "Kirill A . Shutemov" <kirill@...temov.name>,
        "Dr . David Alan Gilbert" <dgilbert@...hat.com>
Subject: Re: [PATCH v2 08/26] userfaultfd: wp: add WP pagetable tracking to
 x86

On Tue, Feb 12, 2019 at 10:56:14AM +0800, Peter Xu wrote:
> From: Andrea Arcangeli <aarcange@...hat.com>
> 
> Accurate userfaultfd WP tracking is possible by tracking exactly which
> virtual memory ranges were writeprotected by userland. We can't relay
> only on the RW bit of the mapped pagetable because that information is
> destroyed by fork() or KSM or swap. If we were to relay on that, we'd
> need to stay on the safe side and generate false positive wp faults
> for every swapped out page.
> 
> Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
> Signed-off-by: Peter Xu <peterx@...hat.com>

So i thought about this some more and the only alternative i see is
definining a new swap type to preserve the pte write bit when swapping,
and storing the original pte write within ksm stable_node. This would
solve false positive for swap and ksm.

But i do not see this as a better alternative to storing the wp status
as bit in the pte. So:

Reviewed-by: Jérôme Glisse <jglisse@...hat.com>

> ---
>  arch/x86/Kconfig                     |  1 +
>  arch/x86/include/asm/pgtable.h       | 52 ++++++++++++++++++++++++++++
>  arch/x86/include/asm/pgtable_64.h    |  8 ++++-
>  arch/x86/include/asm/pgtable_types.h |  9 +++++
>  include/asm-generic/pgtable.h        |  1 +
>  include/asm-generic/pgtable_uffd.h   | 51 +++++++++++++++++++++++++++
>  init/Kconfig                         |  5 +++
>  7 files changed, 126 insertions(+), 1 deletion(-)
>  create mode 100644 include/asm-generic/pgtable_uffd.h
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 68261430fe6e..cb43bc008675 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -209,6 +209,7 @@ config X86
>  	select USER_STACKTRACE_SUPPORT
>  	select VIRT_TO_BUS
>  	select X86_FEATURE_NAMES		if PROC_FS
> +	select HAVE_ARCH_USERFAULTFD_WP		if USERFAULTFD
>  
>  config INSTRUCTION_DECODER
>  	def_bool y
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 2779ace16d23..6863236e8484 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -23,6 +23,7 @@
>  
>  #ifndef __ASSEMBLY__
>  #include <asm/x86_init.h>
> +#include <asm-generic/pgtable_uffd.h>
>  
>  extern pgd_t early_top_pgt[PTRS_PER_PGD];
>  int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
> @@ -293,6 +294,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
>  	return native_make_pte(v & ~clear);
>  }
>  
> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> +static inline int pte_uffd_wp(pte_t pte)
> +{
> +	return pte_flags(pte) & _PAGE_UFFD_WP;
> +}
> +
> +static inline pte_t pte_mkuffd_wp(pte_t pte)
> +{
> +	return pte_set_flags(pte, _PAGE_UFFD_WP);
> +}
> +
> +static inline pte_t pte_clear_uffd_wp(pte_t pte)
> +{
> +	return pte_clear_flags(pte, _PAGE_UFFD_WP);
> +}
> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
> +
>  static inline pte_t pte_mkclean(pte_t pte)
>  {
>  	return pte_clear_flags(pte, _PAGE_DIRTY);
> @@ -372,6 +390,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
>  	return native_make_pmd(v & ~clear);
>  }
>  
> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> +static inline int pmd_uffd_wp(pmd_t pmd)
> +{
> +	return pmd_flags(pmd) & _PAGE_UFFD_WP;
> +}
> +
> +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
> +{
> +	return pmd_set_flags(pmd, _PAGE_UFFD_WP);
> +}
> +
> +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
> +{
> +	return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
> +}
> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
> +
>  static inline pmd_t pmd_mkold(pmd_t pmd)
>  {
>  	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
> @@ -1351,6 +1386,23 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
>  #endif
>  #endif
>  
> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> +static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
> +{
> +	return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
> +}
> +
> +static inline int pte_swp_uffd_wp(pte_t pte)
> +{
> +	return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
> +}
> +
> +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
> +{
> +	return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
> +}
> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
> +
>  #define PKRU_AD_BIT 0x1
>  #define PKRU_WD_BIT 0x2
>  #define PKRU_BITS_PER_PKEY 2
> diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
> index 9c85b54bf03c..e0c5d29b8685 100644
> --- a/arch/x86/include/asm/pgtable_64.h
> +++ b/arch/x86/include/asm/pgtable_64.h
> @@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
>   *
>   * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
>   * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
> - * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
> + * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|F|SD|0| <- swp entry
>   *
>   * G (8) is aliased and used as a PROT_NONE indicator for
>   * !present ptes.  We need to start storing swap entries above
> @@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
>   * erratum where they can be incorrectly set by hardware on
>   * non-present PTEs.
>   *
> + * SD Bits 1-4 are not used in non-present format and available for
> + * special use described below:
> + *
>   * SD (1) in swp entry is used to store soft dirty bit, which helps us
>   * remember soft dirty over page migration
>   *
> + * F (2) in swp entry is used to record when a pagetable is
> + * writeprotected by userfaultfd WP support.
> + *
>   * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
>   * but also L and G.
>   *
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index d6ff0bbdb394..8cebcff91e57 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -32,6 +32,7 @@
>  
>  #define _PAGE_BIT_SPECIAL	_PAGE_BIT_SOFTW1
>  #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_SOFTW1
> +#define _PAGE_BIT_UFFD_WP	_PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
>  #define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
>  #define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
>  
> @@ -100,6 +101,14 @@
>  #define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
>  #endif
>  
> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> +#define _PAGE_UFFD_WP		(_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
> +#define _PAGE_SWP_UFFD_WP	_PAGE_USER
> +#else
> +#define _PAGE_UFFD_WP		(_AT(pteval_t, 0))
> +#define _PAGE_SWP_UFFD_WP	(_AT(pteval_t, 0))
> +#endif
> +
>  #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
>  #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
>  #define _PAGE_DEVMAP	(_AT(u64, 1) << _PAGE_BIT_DEVMAP)
> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
> index 05e61e6c843f..f49afe951711 100644
> --- a/include/asm-generic/pgtable.h
> +++ b/include/asm-generic/pgtable.h
> @@ -10,6 +10,7 @@
>  #include <linux/mm_types.h>
>  #include <linux/bug.h>
>  #include <linux/errno.h>
> +#include <asm-generic/pgtable_uffd.h>
>  
>  #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
>  	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
> diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h
> new file mode 100644
> index 000000000000..643d1bf559c2
> --- /dev/null
> +++ b/include/asm-generic/pgtable_uffd.h
> @@ -0,0 +1,51 @@
> +#ifndef _ASM_GENERIC_PGTABLE_UFFD_H
> +#define _ASM_GENERIC_PGTABLE_UFFD_H
> +
> +#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> +static __always_inline int pte_uffd_wp(pte_t pte)
> +{
> +	return 0;
> +}
> +
> +static __always_inline int pmd_uffd_wp(pmd_t pmd)
> +{
> +	return 0;
> +}
> +
> +static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
> +{
> +	return pte;
> +}
> +
> +static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
> +{
> +	return pmd;
> +}
> +
> +static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
> +{
> +	return pte;
> +}
> +
> +static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
> +{
> +	return pmd;
> +}
> +
> +static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte)
> +{
> +	return pte;
> +}
> +
> +static __always_inline int pte_swp_uffd_wp(pte_t pte)
> +{
> +	return 0;
> +}
> +
> +static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
> +{
> +	return pte;
> +}
> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
> +
> +#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */
> diff --git a/init/Kconfig b/init/Kconfig
> index c9386a365eea..892d61ddf2eb 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1424,6 +1424,11 @@ config ADVISE_SYSCALLS
>  	  applications use these syscalls, you can disable this option to save
>  	  space.
>  
> +config HAVE_ARCH_USERFAULTFD_WP
> +	bool
> +	help
> +	  Arch has userfaultfd write protection support
> +
>  config MEMBARRIER
>  	bool "Enable membarrier() system call" if EXPERT
>  	default y
> -- 
> 2.17.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ