lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <04697A35-AEC7-43F1-8462-1CD39648544A@nvidia.com>
Date:   Tue, 09 Mar 2021 14:52:40 -0500
From:   "Zi Yan" <ziy@...dia.com>
To:     "Axel Rasmussen" <axelrasmussen@...gle.com>
Cc:     "Alexander Viro" <viro@...iv.linux.org.uk>,
        "Andrea Arcangeli" <aarcange@...hat.com>,
        "Andrew Morton" <akpm@...ux-foundation.org>,
        "Hugh Dickins" <hughd@...gle.com>,
        "Jerome Glisse" <jglisse@...hat.com>,
        "Joe Perches" <joe@...ches.com>,
        "Lokesh Gidra" <lokeshgidra@...gle.com>,
        "Mike Rapoport" <rppt@...ux.vnet.ibm.com>,
        "Peter Xu" <peterx@...hat.com>, "Shaohua Li" <shli@...com>,
        "Shuah Khan" <shuah@...nel.org>, "Wang Qing" <wangqing@...o.com>,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-mm@...ck.org, linux-kselftest@...r.kernel.org,
        "Brian Geffon" <bgeffon@...gle.com>,
        "Cannon Matthews" <cannonmatthews@...gle.com>,
        "Dr . David Alan Gilbert" <dgilbert@...hat.com>,
        "David Rientjes" <rientjes@...gle.com>,
        "Michel Lespinasse" <walken@...gle.com>,
        "Mina Almasry" <almasrymina@...gle.com>,
        "Oliver Upton" <oupton@...gle.com>
Subject: Re: [PATCH v2 1/5] userfaultfd: support minor fault handling for
 shmem

On 1 Mar 2021, at 19:01, Axel Rasmussen wrote:

> Modify the userfaultfd register API to allow registering shmem VMAs in
> minor mode. Modify the shmem mcopy implementation to support
> UFFDIO_CONTINUE in order to resolve such faults.
>
> Combine the shmem mcopy handler functions into a single
> shmem_mcopy_atomic_pte, which takes a mode parameter. This matches how
> the hugetlbfs implementation is structured, and lets us remove a good
> chunk of boilerplate.
>
> Signed-off-by: Axel Rasmussen <axelrasmussen@...gle.com>
> ---
>  fs/userfaultfd.c                 |  6 +--
>  include/linux/shmem_fs.h         | 26 ++++-----
>  include/uapi/linux/userfaultfd.h |  4 +-
>  mm/memory.c                      |  8 +--
>  mm/shmem.c                       | 92 +++++++++++++++-----------------
>  mm/userfaultfd.c                 | 27 +++++-----
>  6 files changed, 79 insertions(+), 84 deletions(-)
>
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 14f92285d04f..9f3b8684cf3c 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -1267,8 +1267,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
>  	}
>
>  	if (vm_flags & VM_UFFD_MINOR) {
> -		/* FIXME: Add minor fault interception for shmem. */
> -		if (!is_vm_hugetlb_page(vma))
> +		if (!(is_vm_hugetlb_page(vma) || vma_is_shmem(vma)))
>  			return false;
>  	}
>
> @@ -1941,7 +1940,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
>  	/* report all available features and ioctls to userland */
>  	uffdio_api.features = UFFD_API_FEATURES;
>  #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
> -	uffdio_api.features &= ~UFFD_FEATURE_MINOR_HUGETLBFS;
> +	uffdio_api.features &=
> +		~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
>  #endif
>  	uffdio_api.ioctls = UFFD_API_IOCTLS;
>  	ret = -EFAULT;
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index d82b6f396588..f0919c3722e7 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -9,6 +9,7 @@
>  #include <linux/percpu_counter.h>
>  #include <linux/xattr.h>
>  #include <linux/fs_parser.h>
> +#include <linux/userfaultfd_k.h>
>
>  /* inode in-kernel data */
>
> @@ -122,21 +123,16 @@ static inline bool shmem_file(struct file *file)
>  extern bool shmem_charge(struct inode *inode, long pages);
>  extern void shmem_uncharge(struct inode *inode, long pages);
>
> +#ifdef CONFIG_USERFAULTFD
>  #ifdef CONFIG_SHMEM
> -extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> -				  struct vm_area_struct *dst_vma,
> -				  unsigned long dst_addr,
> -				  unsigned long src_addr,
> -				  struct page **pagep);
> -extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
> -				    pmd_t *dst_pmd,
> -				    struct vm_area_struct *dst_vma,
> -				    unsigned long dst_addr);
> -#else
> -#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
> -			       src_addr, pagep)        ({ BUG(); 0; })
> -#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \
> -				 dst_addr)      ({ BUG(); 0; })
> -#endif
> +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> +			   struct vm_area_struct *dst_vma,
> +			   unsigned long dst_addr, unsigned long src_addr,
> +			   enum mcopy_atomic_mode mode, struct page **pagep);
> +#else /* !CONFIG_SHMEM */
> +#define shmem_mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \
> +			       src_addr, mode, pagep)        ({ BUG(); 0; })
> +#endif /* CONFIG_SHMEM */
> +#endif /* CONFIG_USERFAULTFD */
>
>  #endif
> diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
> index bafbeb1a2624..47d9790d863d 100644
> --- a/include/uapi/linux/userfaultfd.h
> +++ b/include/uapi/linux/userfaultfd.h
> @@ -31,7 +31,8 @@
>  			   UFFD_FEATURE_MISSING_SHMEM |		\
>  			   UFFD_FEATURE_SIGBUS |		\
>  			   UFFD_FEATURE_THREAD_ID |		\
> -			   UFFD_FEATURE_MINOR_HUGETLBFS)
> +			   UFFD_FEATURE_MINOR_HUGETLBFS |	\
> +			   UFFD_FEATURE_MINOR_SHMEM)
>  #define UFFD_API_IOCTLS				\
>  	((__u64)1 << _UFFDIO_REGISTER |		\
>  	 (__u64)1 << _UFFDIO_UNREGISTER |	\
> @@ -196,6 +197,7 @@ struct uffdio_api {
>  #define UFFD_FEATURE_SIGBUS			(1<<7)
>  #define UFFD_FEATURE_THREAD_ID			(1<<8)
>  #define UFFD_FEATURE_MINOR_HUGETLBFS		(1<<9)
> +#define UFFD_FEATURE_MINOR_SHMEM		(1<<10)
>  	__u64 features;
>
>  	__u64 ioctls;
> diff --git a/mm/memory.c b/mm/memory.c
> index c8e357627318..a1e5ff55027e 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3929,9 +3929,11 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf)
>  	 * something).
>  	 */
>  	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
> -		ret = do_fault_around(vmf);
> -		if (ret)
> -			return ret;
> +		if (likely(!userfaultfd_minor(vmf->vma))) {
> +			ret = do_fault_around(vmf);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>
>  	ret = __do_fault(vmf);
> diff --git a/mm/shmem.c b/mm/shmem.c
> index b2db4ed0fbc7..6f81259fabb3 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -77,7 +77,6 @@ static struct vfsmount *shm_mnt;
>  #include <linux/syscalls.h>
>  #include <linux/fcntl.h>
>  #include <uapi/linux/memfd.h>
> -#include <linux/userfaultfd_k.h>
>  #include <linux/rmap.h>
>  #include <linux/uuid.h>
>
> @@ -1785,8 +1784,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>   * vm. If we swap it in we mark it dirty since we also free the swap
>   * entry since a page cannot live in both the swap and page cache.
>   *
> - * vmf and fault_type are only supplied by shmem_fault:
> - * otherwise they are NULL.
> + * vma, vmf, and fault_type are only supplied by shmem_fault: otherwise they
> + * are NULL.
>   */
>  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  	struct page **pagep, enum sgp_type sgp, gfp_t gfp,
> @@ -1830,6 +1829,12 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  		return error;
>  	}
>
> +	if (page && vma && userfaultfd_minor(vma)) {
> +		unlock_page(page);
> +		*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
> +		return 0;
> +	}
> +
>  	if (page)
>  		hindex = page->index;
>  	if (page && sgp == SGP_WRITE)
> @@ -2354,14 +2359,12 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
>  	return inode;
>  }
>
> -static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
> -				  pmd_t *dst_pmd,
> -				  struct vm_area_struct *dst_vma,
> -				  unsigned long dst_addr,
> -				  unsigned long src_addr,
> -				  bool zeropage,
> -				  struct page **pagep)
> +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> +			   struct vm_area_struct *dst_vma,
> +			   unsigned long dst_addr, unsigned long src_addr,
> +			   enum mcopy_atomic_mode mode, struct page **pagep)
>  {
> +	bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
>  	struct inode *inode = file_inode(dst_vma->vm_file);
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct address_space *mapping = inode->i_mapping;
> @@ -2378,12 +2381,17 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
>  	if (!shmem_inode_acct_block(inode, 1))
>  		goto out;
>
> -	if (!*pagep) {
> +	if (is_continue) {
> +		ret = -EFAULT;
> +		page = find_lock_page(mapping, pgoff);
> +		if (!page)
> +			goto out_unacct_blocks;
> +	} else if (!*pagep) {
>  		page = shmem_alloc_page(gfp, info, pgoff);
>  		if (!page)
>  			goto out_unacct_blocks;
>
> -		if (!zeropage) {	/* mcopy_atomic */
> +		if (mode == MCOPY_ATOMIC_NORMAL) {	/* mcopy_atomic */
>  			page_kaddr = kmap_atomic(page);
>  			ret = copy_from_user(page_kaddr,
>  					     (const void __user *)src_addr,

Hi Axel,

shmem_mcopy_atomic_pte is not guarded by CONFIG_USERFAULTFD, thus it is
causing compilation errors due to the use of enum mcopy_atomic_mode mode,
when CONFIG_USERFAULTFD is not set.


—
Best Regards,
Yan Zi

Download attachment "signature.asc" of type "application/pgp-signature" (855 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ