linux-kernel - [PATCH] userfaultfd/shmem: fix MCOPY_ATOMIC

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-Id: <20210325231027.3402321-1-axelrasmussen@google.com>
Date:   Thu, 25 Mar 2021 16:10:27 -0700
From:   Axel Rasmussen <axelrasmussen@...gle.com>
To:     Alexander Viro <viro@...iv.linux.org.uk>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Hugh Dickins <hughd@...gle.com>,
        Jerome Glisse <jglisse@...hat.com>,
        Joe Perches <joe@...ches.com>,
        Lokesh Gidra <lokeshgidra@...gle.com>,
        Mike Rapoport <rppt@...ux.vnet.ibm.com>,
        Peter Xu <peterx@...hat.com>, Shaohua Li <shli@...com>,
        Shuah Khan <shuah@...nel.org>,
        Stephen Rothwell <sfr@...b.auug.org.au>,
        Wang Qing <wangqing@...o.com>
Cc:     linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-mm@...ck.org, linux-kselftest@...r.kernel.org,
        Axel Rasmussen <axelrasmussen@...gle.com>,
        Brian Geffon <bgeffon@...gle.com>,
        Cannon Matthews <cannonmatthews@...gle.com>,
        "Dr . David Alan Gilbert" <dgilbert@...hat.com>,
        David Rientjes <rientjes@...gle.com>,
        Michel Lespinasse <walken@...gle.com>,
        Mina Almasry <almasrymina@...gle.com>,
        Oliver Upton <oupton@...gle.com>
Subject: [PATCH] userfaultfd/shmem: fix MCOPY_ATOMIC_CONTNUE error handling + accounting

Previously, in the error path, we unconditionally removed the page from
the page cache. But in the continue case, we didn't add it - it was
already there because the page is used by a second (non-UFFD-registered)
mapping. So, in that case, it's incorrect to remove it as the other
mapping may still use it normally.

For this error handling failure, trivially exercise it in the
userfaultfd selftest, to detect this kind of bug in the future.

Also, we previously were unconditionally calling shmem_inode_acct_block.
In the continue case, however, this is incorrect, because we would have
already accounted for the RAM usage when the page was originally
allocated (since at this point it's already in the page cache). So,
doing it in the continue case causes us to double-count.

Fixes: 00da60b9d0a0 ("userfaultfd: support minor fault handling for shmem")
Signed-off-by: Axel Rasmussen <axelrasmussen@...gle.com>
---
 mm/shmem.c                               | 15 ++++++++++-----
 tools/testing/selftests/vm/userfaultfd.c | 12 ++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index d2e0e81b7d2e..5ac8ea737004 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2379,9 +2379,11 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 	int ret;
 	pgoff_t offset, max_off;
 
-	ret = -ENOMEM;
-	if (!shmem_inode_acct_block(inode, 1))
-		goto out;
+	if (!is_continue) {
+		ret = -ENOMEM;
+		if (!shmem_inode_acct_block(inode, 1))
+			goto out;
+	}
 
 	if (is_continue) {
 		ret = -EFAULT;
@@ -2389,6 +2391,7 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 		if (!page)
 			goto out_unacct_blocks;
 	} else if (!*pagep) {
+		ret = -ENOMEM;
 		page = shmem_alloc_page(gfp, info, pgoff);
 		if (!page)
 			goto out_unacct_blocks;
@@ -2486,12 +2489,14 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 out_release_unlock:
 	pte_unmap_unlock(dst_pte, ptl);
 	ClearPageDirty(page);
-	delete_from_page_cache(page);
+	if (!is_continue)
+		delete_from_page_cache(page);
 out_release:
 	unlock_page(page);
 	put_page(page);
 out_unacct_blocks:
-	shmem_inode_unacct_blocks(inode, 1);
+	if (!is_continue)
+		shmem_inode_unacct_blocks(inode, 1);
 	goto out;
 }
 #endif /* CONFIG_USERFAULTFD */
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index f6c86b036d0f..d8541a59dae5 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -485,6 +485,7 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 static void continue_range(int ufd, __u64 start, __u64 len)
 {
 	struct uffdio_continue req;
+	int ret;
 
 	req.range.start = start;
 	req.range.len = len;
@@ -493,6 +494,17 @@ static void continue_range(int ufd, __u64 start, __u64 len)
 	if (ioctl(ufd, UFFDIO_CONTINUE, &req))
 		err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
 		    (uint64_t)start);
+
+	/*
+	 * Error handling within the kernel for continue is subtly different
+	 * from copy or zeropage, so it may be a source of bugs. Trigger an
+	 * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
+	 */
+	req.mapped = 0;
+	ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
+	if (ret >= 0 || req.mapped != -EEXIST)
+		err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
+		    ret, req.mapped);
 }
 
 static void *locking_thread(void *arg)
-- 
2.31.0.291.g576ba9dcdaf-goog