lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87bkx7ayfs.fsf@nvdebian.thelocal>
Date:   Tue, 12 Apr 2022 11:07:56 +1000
From:   Alistair Popple <apopple@...dia.com>
To:     Peter Xu <peterx@...hat.com>
Cc:     linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        Mike Kravetz <mike.kravetz@...cle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        David Hildenbrand <david@...hat.com>,
        Matthew Wilcox <willy@...radead.org>,
        Nadav Amit <nadav.amit@...il.com>,
        Axel Rasmussen <axelrasmussen@...gle.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        "Kirill A . Shutemov" <kirill@...temov.name>,
        Hugh Dickins <hughd@...gle.com>,
        Jerome Glisse <jglisse@...hat.com>,
        Mike Rapoport <rppt@...ux.vnet.ibm.com>
Subject: Re: [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry

Hi Peter,

I noticed this while reviewing the next patch in the series. I think you need to
add CONFIG_PTE_MARKER to the below as well:

#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \
    defined(CONFIG_DEVICE_PRIVATE)
static inline int non_swap_entry(swp_entry_t entry)
{
	return swp_type(entry) >= MAX_SWAPFILES;
}
#else
static inline int non_swap_entry(swp_entry_t entry)
{
	return 0;
}
#endif

Otherwise marker entries will be treated as swap entries, which is wrong for
example in swapin_walk_pmd_entry() as marker entries are no longer considered
pte_none().

- Alistair

Peter Xu <peterx@...hat.com> writes:

> This patch introduces a new swap entry type called PTE_MARKER.  It can be
> installed for any pte that maps a file-backed memory when the pte is
> temporarily zapped, so as to maintain per-pte information.
>
> The information that kept in the pte is called a "marker".  Here we define the
> marker as "unsigned long" just to match pgoff_t, however it will only work if
> it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.
>
> A new config CONFIG_PTE_MARKER is introduced too; it's by default off.  A bunch
> of helpers are defined altogether to service the rest of the pte marker code.
>
> Signed-off-by: Peter Xu <peterx@...hat.com>
> ---
>  include/asm-generic/hugetlb.h |  9 ++++
>  include/linux/swap.h          | 15 ++++++-
>  include/linux/swapops.h       | 78 +++++++++++++++++++++++++++++++++++
>  mm/Kconfig                    |  6 +++
>  4 files changed, 107 insertions(+), 1 deletion(-)
>
> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
> index 8e1e6244a89d..f39cad20ffc6 100644
> --- a/include/asm-generic/hugetlb.h
> +++ b/include/asm-generic/hugetlb.h
> @@ -2,6 +2,9 @@
>  #ifndef _ASM_GENERIC_HUGETLB_H
>  #define _ASM_GENERIC_HUGETLB_H
>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> +
>  static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
>  {
>  	return mk_pte(page, pgprot);
> @@ -80,6 +83,12 @@ static inline int huge_pte_none(pte_t pte)
>  }
>  #endif
>
> +/* Please refer to comments above pte_none_mostly() for the usage */
> +static inline int huge_pte_none_mostly(pte_t pte)
> +{
> +	return huge_pte_none(pte) || is_pte_marker(pte);
> +}
> +
>  #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
>  static inline pte_t huge_pte_wrprotect(pte_t pte)
>  {
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7daae5a4b3e1..5553189d0215 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -55,6 +55,19 @@ static inline int current_is_kswapd(void)
>   * actions on faults.
>   */
>
> +/*
> + * PTE markers are used to persist information onto PTEs that are mapped with
> + * file-backed memories.  As its name "PTE" hints, it should only be applied to
> + * the leaves of pgtables.
> + */
> +#ifdef CONFIG_PTE_MARKER
> +#define SWP_PTE_MARKER_NUM 1
> +#define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
> +			    SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
> +#else
> +#define SWP_PTE_MARKER_NUM 0
> +#endif
> +
>  /*
>   * Unaddressable device memory support. See include/linux/hmm.h and
>   * Documentation/vm/hmm.rst. Short description is we need struct pages for
> @@ -107,7 +120,7 @@ static inline int current_is_kswapd(void)
>
>  #define MAX_SWAPFILES \
>  	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
> -	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> +	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
>
>  /*
>   * Magic header for a swap area. The first part of the union is
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 32d517a28969..7a00627845f0 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -274,6 +274,84 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
>
>  #endif
>
> +typedef unsigned long pte_marker;
> +
> +#define  PTE_MARKER_MASK     (0)
> +
> +#ifdef CONFIG_PTE_MARKER
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> +	return swp_entry(SWP_PTE_MARKER, marker);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> +	return swp_type(entry) == SWP_PTE_MARKER;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> +	return swp_offset(entry) & PTE_MARKER_MASK;
> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> +	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
> +}
> +
> +#else /* CONFIG_PTE_MARKER */
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> +	/* This should never be called if !CONFIG_PTE_MARKER */
> +	WARN_ON_ONCE(1);
> +	return swp_entry(0, 0);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> +	return 0;
> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> +	return false;
> +}
> +
> +#endif /* CONFIG_PTE_MARKER */
> +
> +static inline pte_t make_pte_marker(pte_marker marker)
> +{
> +	return swp_entry_to_pte(make_pte_marker_entry(marker));
> +}
> +
> +/*
> + * This is a special version to check pte_none() just to cover the case when
> + * the pte is a pte marker.  It existed because in many cases the pte marker
> + * should be seen as a none pte; it's just that we have stored some information
> + * onto the none pte so it becomes not-none any more.
> + *
> + * It should be used when the pte is file-backed, ram-based and backing
> + * userspace pages, like shmem.  It is not needed upon pgtables that do not
> + * support pte markers at all.  For example, it's not needed on anonymous
> + * memory, kernel-only memory (including when the system is during-boot),
> + * non-ram based generic file-system.  It's fine to be used even there, but the
> + * extra pte marker check will be pure overhead.
> + *
> + * For systems configured with !CONFIG_PTE_MARKER this will be automatically
> + * optimized to pte_none().
> + */
> +static inline int pte_none_mostly(pte_t pte)
> +{
> +	return pte_none(pte) || is_pte_marker(pte);
> +}
> +
>  static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
>  {
>  	struct page *p = pfn_to_page(swp_offset(entry));
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 034d87953600..a1688b9314b2 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -909,6 +909,12 @@ config ANON_VMA_NAME
>  	  area from being merged with adjacent virtual memory areas due to the
>  	  difference in their name.
>
> +config PTE_MARKER
> +	bool "Marker PTEs support"
> +
> +	help
> +	  Allows to create marker PTEs for file-backed memory.
> +
>  source "mm/damon/Kconfig"
>
>  endmenu

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ