lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 28 May 2021 10:07:02 -0700
From:   "Lin, Ming" <minggr@...il.com>
To:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Simon Ser <contact@...rsion.fr>
Cc:     Peter Xu <peterx@...hat.com>,
        "Kirill A. Shutemov" <kirill@...temov.name>,
        Matthew Wilcox <willy@...radead.org>,
        Dan Williams <dan.j.williams@...el.com>,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
        Will Deacon <will@...nel.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        David Herrmann <dh.herrmann@...il.com>,
        "linux-mm@...ck.org" <linux-mm@...ck.org>,
        Greg Kroah-Hartman <greg@...ah.com>,
        "tytso@....edu" <tytso@....edu>
Subject: Re: Sealed memfd & no-fault mmap

On 5/5/2021 11:42 AM, Linus Torvalds wrote:
> On Wed, May 5, 2021 at 3:21 AM Simon Ser <contact@...rsion.fr> wrote:
>>>
>>> Is there some very specific and targeted pattern for that "shared
>>> mapping" case? For example, if it's always a shared anonymous mapping
>>> with no filesystem backing, then that would possibly be a simpler case
>>> than the "random arbitrary shared file descriptor".
>>
>> Yes. I don't know of any Wayland client using buffers with real
>> filesystem backing. I think the main cases are:
>>
>> - shm_open(3) immediately followed by shm_unlink(3). On Linux, this is
>>    implemented with /dev/shm which is a tmpfs.
>> - Abusing /tmp or /run's tmpfs by creating a file there and unlinking
>>    it immediately afterwards. Kind of similar to the first case.
>> - memfd_create(2) on Linux.
>>
>> Is this enough to make it work on shared memory mappings? Is it
>> important that the mapping is anonymous?
> 
> All of those should be anonymous in the sense that the backing store
> is all the kernel's notion of anonymous pages, and there is no actual
> file backing. The mappings may then be shared, of course.
> 
> So that does make Peter's idea to have some inode flag for "don't
> SIGBUS on fault" be more reasonable, because there isn't some random
> actual filesystem involved, only the core VM layer.
> 
> I'm not going to write the patch, though, but maybe you can convince
> somebody else to try it..

Does something like following draft patch on the right track?

1. Application set S_NOFAULT flag on shm mmap fd

	#define S_NOFAULT       (1 << 17)
         fd = shm_open(shmpath, O_RDONLY, S_IRUSR | S_IWUSR);
         ioctl(fd, FS_IOC_GETFLAGS, &flags);
         flags |= S_NOFAULT;
         ioctl(fd, FS_IOC_SETFLAGS, &flags)

2. Don't SIGBUS on read beyond i_size if S_NOFAULT flag set in inode.
    Use zero page instead.

---

[RFC DRAFT PATCH] shm: no SIGBUS fault on out-of-band mmap read
---
  include/linux/fs.h |  2 ++
  mm/shmem.c         | 44 +++++++++++++++++++++++++++++++++++++++++++-
  2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..a9be7cd71b94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2202,6 +2202,7 @@ struct super_operations {
  #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
  #define S_CASEFOLD	(1 << 15) /* Casefolded file */
  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
+#define S_NOFAULT	(1 << 17) /* No SIGBUS fault on out-of-band mmap read */
  
  /*
   * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2244,6 +2245,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
  #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
  #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
  #define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
+#define IS_NOFAULT(inode)	((inode)->i_flags & S_NOFAULT)
  
  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
  				 (inode)->i_rdev == WHITEOUT_DEV)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5d46611cba8d..856d2d8d4cdf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -38,8 +38,11 @@
  #include <linux/hugetlb.h>
  #include <linux/frontswap.h>
  #include <linux/fs_parser.h>
+#include <linux/fs.h>
+#include <linux/fileattr.h>
  
  #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+#include <asm/pgalloc.h>
  
  static struct vfsmount *shm_mnt;
  
@@ -1812,7 +1815,27 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
  repeat:
  	if (sgp <= SGP_CACHE &&
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
-		return -EINVAL;
+		unsigned long dst_addr = vmf->address;
+		pte_t _dst_pte, *dst_pte;
+		spinlock_t *ptl;
+		int ret;
+
+		if (!IS_NOFAULT(inode))
+			return -EINVAL;
+
+		_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
+					 vma->vm_page_prot));
+		dst_pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, dst_addr, &ptl);
+		ret = -EEXIST;
+		if (!pte_none(*dst_pte))
+			goto out_unlock;
+		set_pte_at(vma->vm_mm, dst_addr, dst_pte, _dst_pte);
+		update_mmu_cache(vma, dst_addr, dst_pte);
+		*fault_type = VM_FAULT_NOPAGE;
+		ret = 0;
+out_unlock:
+		pte_unmap_unlock(dst_pte, ptl);
+		return ret;
  	}
  
  	sbinfo = SHMEM_SB(inode->i_sb);
@@ -3819,6 +3842,23 @@ const struct address_space_operations shmem_aops = {
  };
  EXPORT_SYMBOL(shmem_aops);
  
+static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+
+	fileattr_fill_flags(fa, inode->i_flags);
+
+	return 0;
+}
+
+static int shmem_fileattr_set(struct user_namespace *mnt_userns,
+			      struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+	inode->i_flags = fa->flags;
+	return 0;
+}
+
  static const struct file_operations shmem_file_operations = {
  	.mmap		= shmem_mmap,
  	.get_unmapped_area = shmem_get_unmapped_area,
@@ -3836,6 +3876,8 @@ static const struct file_operations shmem_file_operations = {
  static const struct inode_operations shmem_inode_operations = {
  	.getattr	= shmem_getattr,
  	.setattr	= shmem_setattr,
+	.fileattr_get	= shmem_fileattr_get,
+	.fileattr_set	= shmem_fileattr_set,
  #ifdef CONFIG_TMPFS_XATTR
  	.listxattr	= shmem_listxattr,
  	.set_acl	= simple_set_acl,

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ