lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 25 Feb 2020 12:12:52 -0800
From:   Kees Cook <keescook@...omium.org>
To:     Yu-cheng Yu <yu-cheng.yu@...el.com>
Cc:     x86@...nel.org, "H. Peter Anvin" <hpa@...or.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
        linux-doc@...r.kernel.org, linux-mm@...ck.org,
        linux-arch@...r.kernel.org, linux-api@...r.kernel.org,
        Arnd Bergmann <arnd@...db.de>,
        Andy Lutomirski <luto@...nel.org>,
        Balbir Singh <bsingharora@...il.com>,
        Borislav Petkov <bp@...en8.de>,
        Cyrill Gorcunov <gorcunov@...il.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Eugene Syromiatnikov <esyr@...hat.com>,
        Florian Weimer <fweimer@...hat.com>,
        "H.J. Lu" <hjl.tools@...il.com>, Jann Horn <jannh@...gle.com>,
        Jonathan Corbet <corbet@....net>,
        Mike Kravetz <mike.kravetz@...cle.com>,
        Nadav Amit <nadav.amit@...il.com>,
        Oleg Nesterov <oleg@...hat.com>, Pavel Machek <pavel@....cz>,
        Peter Zijlstra <peterz@...radead.org>,
        Randy Dunlap <rdunlap@...radead.org>,
        "Ravi V. Shankar" <ravi.v.shankar@...el.com>,
        Vedvyas Shanbhogue <vedvyas.shanbhogue@...el.com>,
        Dave Martin <Dave.Martin@....com>, x86-patch-review@...el.com
Subject: Re: [RFC PATCH v9 09/27] x86/mm: Introduce _PAGE_DIRTY_SW

On Wed, Feb 05, 2020 at 10:19:17AM -0800, Yu-cheng Yu wrote:
> When Shadow Stack (SHSTK) is introduced, a R/O and Dirty PTE exists in the
> following cases:
> 
> (a) A modified, copy-on-write (COW) page;
> (b) A R/O page that has been COW'ed;
> (c) A SHSTK page.
> 
> To separate non-SHSTK memory from SHSTK, introduce a spare bit of the
> 64-bit PTE as _PAGE_BIT_DIRTY_SW and use that for case (a) and (b).
> This results in the following possible settings:
> 
> Modified PTE:         (R/W + DIRTY_HW)
> Modified and COW PTE: (R/O + DIRTY_SW)
> R/O PTE COW'ed:       (R/O + DIRTY_SW)
> SHSTK PTE:            (R/O + DIRTY_HW)
> SHSTK shared PTE[1]:  (R/O + DIRTY_SW)
> SHSTK PTE COW'ed:     (R/O + DIRTY_HW)
> 
> [1] When a SHSTK page is being shared among threads, its PTE is cleared of
>     _PAGE_DIRTY_HW, so the next SHSTK access causes a fault, and the page
>     is duplicated and _PAGE_DIRTY_HW is set again.
> 
> With this, in pte_wrprotect(), if SHSTK is active, use _PAGE_DIRTY_SW for
> the Dirty bit, and in pte_mkwrite() use _PAGE_DIRTY_HW.  The same changes
> apply to pmd and pud.
> 
> When this patch is applied, there are six free bits left in the 64-bit PTE.
> There are no more free bits in the 32-bit PTE (except for PAE) and SHSTK is
> not implemented for the 32-bit kernel.
> 
> v9:
> - Remove pte_move_flags() etc. and put the logic directly in
>   pte_wrprotect()/pte_mkwrite() etc.
> - Change compile-time conditionals to run-time checks.
> - Split out pte_modify()/pmd_modify() to a new patch.
> - Update comments.
> 
> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@...el.com>

Reviewed-by: Kees Cook <keescook@...omium.org>

-Kees

> ---
>  arch/x86/include/asm/pgtable.h       | 111 ++++++++++++++++++++++++---
>  arch/x86/include/asm/pgtable_types.h |  31 +++++++-
>  2 files changed, 131 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index ab50d25f9afc..62aeb118bc36 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -120,9 +120,9 @@ extern pmdval_t early_pmd_flags;
>   * The following only work if pte_present() is true.
>   * Undefined behaviour if not..
>   */
> -static inline int pte_dirty(pte_t pte)
> +static inline bool pte_dirty(pte_t pte)
>  {
> -	return pte_flags(pte) & _PAGE_DIRTY_HW;
> +	return pte_flags(pte) & _PAGE_DIRTY_BITS;
>  }
>  
>  
> @@ -159,9 +159,9 @@ static inline int pte_young(pte_t pte)
>  	return pte_flags(pte) & _PAGE_ACCESSED;
>  }
>  
> -static inline int pmd_dirty(pmd_t pmd)
> +static inline bool pmd_dirty(pmd_t pmd)
>  {
> -	return pmd_flags(pmd) & _PAGE_DIRTY_HW;
> +	return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
>  }
>  
>  static inline int pmd_young(pmd_t pmd)
> @@ -169,9 +169,9 @@ static inline int pmd_young(pmd_t pmd)
>  	return pmd_flags(pmd) & _PAGE_ACCESSED;
>  }
>  
> -static inline int pud_dirty(pud_t pud)
> +static inline bool pud_dirty(pud_t pud)
>  {
> -	return pud_flags(pud) & _PAGE_DIRTY_HW;
> +	return pud_flags(pud) & _PAGE_DIRTY_BITS;
>  }
>  
>  static inline int pud_young(pud_t pud)
> @@ -312,7 +312,7 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
>  
>  static inline pte_t pte_mkclean(pte_t pte)
>  {
> -	return pte_clear_flags(pte, _PAGE_DIRTY_HW);
> +	return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
>  }
>  
>  static inline pte_t pte_mkold(pte_t pte)
> @@ -322,6 +322,17 @@ static inline pte_t pte_mkold(pte_t pte)
>  
>  static inline pte_t pte_wrprotect(pte_t pte)
>  {
> +	/*
> +	 * Use _PAGE_DIRTY_SW on a R/O PTE to set it apart from
> +	 * a Shadow Stack PTE, which is R/O + _PAGE_DIRTY_HW.
> +	 */
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pte_flags(pte) & _PAGE_DIRTY_HW) {
> +			pte = pte_clear_flags(pte, _PAGE_DIRTY_HW);
> +			pte = pte_set_flags(pte, _PAGE_DIRTY_SW);
> +		}
> +	}
> +
>  	return pte_clear_flags(pte, _PAGE_RW);
>  }
>  
> @@ -332,9 +343,25 @@ static inline pte_t pte_mkexec(pte_t pte)
>  
>  static inline pte_t pte_mkdirty(pte_t pte)
>  {
> +	pteval_t dirty = _PAGE_DIRTY_HW;
> +
> +	if (static_cpu_has(X86_FEATURE_SHSTK) && !pte_write(pte))
> +		dirty = _PAGE_DIRTY_SW;
> +
> +	return pte_set_flags(pte, dirty | _PAGE_SOFT_DIRTY);
> +}
> +
> +static inline pte_t pte_mkdirty_shstk(pte_t pte)
> +{
> +	pte = pte_clear_flags(pte, _PAGE_DIRTY_SW);
>  	return pte_set_flags(pte, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
>  }
>  
> +static inline bool pte_dirty_hw(pte_t pte)
> +{
> +	return pte_flags(pte) & _PAGE_DIRTY_HW;
> +}
> +
>  static inline pte_t pte_mkyoung(pte_t pte)
>  {
>  	return pte_set_flags(pte, _PAGE_ACCESSED);
> @@ -342,6 +369,13 @@ static inline pte_t pte_mkyoung(pte_t pte)
>  
>  static inline pte_t pte_mkwrite(pte_t pte)
>  {
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pte_flags(pte) & _PAGE_DIRTY_SW) {
> +			pte = pte_clear_flags(pte, _PAGE_DIRTY_SW);
> +			pte = pte_set_flags(pte, _PAGE_DIRTY_HW);
> +		}
> +	}
> +
>  	return pte_set_flags(pte, _PAGE_RW);
>  }
>  
> @@ -396,19 +430,46 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
>  
>  static inline pmd_t pmd_mkclean(pmd_t pmd)
>  {
> -	return pmd_clear_flags(pmd, _PAGE_DIRTY_HW);
> +	return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
>  }
>  
>  static inline pmd_t pmd_wrprotect(pmd_t pmd)
>  {
> +	/*
> +	 * Use _PAGE_DIRTY_SW on a R/O PMD to set it apart from
> +	 * a Shadow Stack PTE, which is R/O + _PAGE_DIRTY_HW.
> +	 */
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pmd_flags(pmd) & _PAGE_DIRTY_HW) {
> +			pmd = pmd_clear_flags(pmd, _PAGE_DIRTY_HW);
> +			pmd = pmd_set_flags(pmd, _PAGE_DIRTY_SW);
> +		}
> +	}
> +
>  	return pmd_clear_flags(pmd, _PAGE_RW);
>  }
>  
>  static inline pmd_t pmd_mkdirty(pmd_t pmd)
>  {
> +	pmdval_t dirty = _PAGE_DIRTY_HW;
> +
> +	if (static_cpu_has(X86_FEATURE_SHSTK) && !(pmd_flags(pmd) & _PAGE_RW))
> +		dirty = _PAGE_DIRTY_SW;
> +
> +	return pmd_set_flags(pmd, dirty | _PAGE_SOFT_DIRTY);
> +}
> +
> +static inline pmd_t pmd_mkdirty_shstk(pmd_t pmd)
> +{
> +	pmd = pmd_clear_flags(pmd, _PAGE_DIRTY_SW);
>  	return pmd_set_flags(pmd, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
>  }
>  
> +static inline bool pmd_dirty_hw(pmd_t pmd)
> +{
> +	return  pmd_flags(pmd) & _PAGE_DIRTY_HW;
> +}
> +
>  static inline pmd_t pmd_mkdevmap(pmd_t pmd)
>  {
>  	return pmd_set_flags(pmd, _PAGE_DEVMAP);
> @@ -426,6 +487,13 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
>  
>  static inline pmd_t pmd_mkwrite(pmd_t pmd)
>  {
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pmd_flags(pmd) & _PAGE_DIRTY_SW) {
> +			pmd = pmd_clear_flags(pmd, _PAGE_DIRTY_SW);
> +			pmd = pmd_set_flags(pmd, _PAGE_DIRTY_HW);
> +		}
> +	}
> +
>  	return pmd_set_flags(pmd, _PAGE_RW);
>  }
>  
> @@ -450,17 +518,33 @@ static inline pud_t pud_mkold(pud_t pud)
>  
>  static inline pud_t pud_mkclean(pud_t pud)
>  {
> -	return pud_clear_flags(pud, _PAGE_DIRTY_HW);
> +	return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
>  }
>  
>  static inline pud_t pud_wrprotect(pud_t pud)
>  {
> +	/*
> +	 * Use _PAGE_DIRTY_SW on a R/O PUD to set it apart from
> +	 * a Shadow Stack PTE, which is R/O + _PAGE_DIRTY_HW.
> +	 */
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pud_flags(pud) & _PAGE_DIRTY_HW) {
> +			pud = pud_clear_flags(pud, _PAGE_DIRTY_HW);
> +			pud = pud_set_flags(pud, _PAGE_DIRTY_SW);
> +		}
> +	}
> +
>  	return pud_clear_flags(pud, _PAGE_RW);
>  }
>  
>  static inline pud_t pud_mkdirty(pud_t pud)
>  {
> -	return pud_set_flags(pud, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
> +	pudval_t dirty = _PAGE_DIRTY_HW;
> +
> +	if (static_cpu_has(X86_FEATURE_SHSTK) && !(pud_flags(pud) & _PAGE_RW))
> +		dirty = _PAGE_DIRTY_SW;
> +
> +	return pud_set_flags(pud, dirty | _PAGE_SOFT_DIRTY);
>  }
>  
>  static inline pud_t pud_mkdevmap(pud_t pud)
> @@ -480,6 +564,13 @@ static inline pud_t pud_mkyoung(pud_t pud)
>  
>  static inline pud_t pud_mkwrite(pud_t pud)
>  {
> +	if (static_cpu_has(X86_FEATURE_SHSTK)) {
> +		if (pud_flags(pud) & _PAGE_DIRTY_SW) {
> +			pud = pud_clear_flags(pud, _PAGE_DIRTY_SW);
> +			pud = pud_set_flags(pud, _PAGE_DIRTY_HW);
> +		}
> +	}
> +
>  	return pud_set_flags(pud, _PAGE_RW);
>  }
>  
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index e647e3c75578..826823df917f 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -23,7 +23,8 @@
>  #define _PAGE_BIT_SOFTW2	10	/* " */
>  #define _PAGE_BIT_SOFTW3	11	/* " */
>  #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
> -#define _PAGE_BIT_SOFTW4	58	/* available for programmer */
> +#define _PAGE_BIT_SOFTW4	57	/* available for programmer */
> +#define _PAGE_BIT_SOFTW5	58	/* available for programmer */
>  #define _PAGE_BIT_PKEY_BIT0	59	/* Protection Keys, bit 1/4 */
>  #define _PAGE_BIT_PKEY_BIT1	60	/* Protection Keys, bit 2/4 */
>  #define _PAGE_BIT_PKEY_BIT2	61	/* Protection Keys, bit 3/4 */
> @@ -35,6 +36,12 @@
>  #define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
>  #define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
>  
> +/*
> + * This bit indicates a copy-on-write page, and is different from
> + * _PAGE_BIT_SOFT_DIRTY, which tracks which pages a task writes to.
> + */
> +#define _PAGE_BIT_DIRTY_SW	_PAGE_BIT_SOFTW5 /* was written to */
> +
>  /* If _PAGE_BIT_PRESENT is clear, we use these: */
>  /* - if the user mapped it with PROT_NONE; pte_present gives true */
>  #define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
> @@ -108,6 +115,28 @@
>  #define _PAGE_DEVMAP	(_AT(pteval_t, 0))
>  #endif
>  
> +/* A R/O and dirty PTE exists in the following cases:
> + *	(a) A modified, copy-on-write (COW) page;
> + *	(b) A R/O page that has been COW'ed;
> + *	(c) A SHSTK page.
> + * _PAGE_DIRTY_SW is used to separate case (c) from others.
> + * This results in the following settings:
> + *
> + *	Modified PTE:         (R/W + DIRTY_HW)
> + *	Modified and COW PTE: (R/O + DIRTY_SW)
> + *	R/O PTE COW'ed:       (R/O + DIRTY_SW)
> + *	SHSTK PTE:            (R/O + DIRTY_HW)
> + *	SHSTK PTE COW'ed:     (R/O + DIRTY_HW)
> + *	SHSTK PTE being shared among threads: (R/O + DIRTY_SW)
> + */
> +#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER
> +#define _PAGE_DIRTY_SW	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY_SW)
> +#else
> +#define _PAGE_DIRTY_SW	(_AT(pteval_t, 0))
> +#endif
> +
> +#define _PAGE_DIRTY_BITS (_PAGE_DIRTY_HW | _PAGE_DIRTY_SW)
> +
>  #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
>  
>  #define _PAGE_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
> -- 
> 2.21.0
> 

-- 
Kees Cook

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ