lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2a733e3ed673c3b9d6b1a5fcb6625953da042f42.1681176340.git.ackerleytng@google.com>
Date:   Tue, 11 Apr 2023 01:29:33 +0000
From:   Ackerley Tng <ackerleytng@...gle.com>
To:     kvm@...r.kernel.org, linux-api@...r.kernel.org,
        linux-arch@...r.kernel.org, linux-doc@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, qemu-devel@...gnu.org
Cc:     aarcange@...hat.com, ak@...ux.intel.com, akpm@...ux-foundation.org,
        arnd@...db.de, bfields@...ldses.org, bp@...en8.de,
        chao.p.peng@...ux.intel.com, corbet@....net, dave.hansen@...el.com,
        david@...hat.com, ddutile@...hat.com, dhildenb@...hat.com,
        hpa@...or.com, hughd@...gle.com, jlayton@...nel.org,
        jmattson@...gle.com, joro@...tes.org, jun.nakajima@...el.com,
        kirill.shutemov@...ux.intel.com, linmiaohe@...wei.com,
        luto@...nel.org, mail@...iej.szmigiero.name, mhocko@...e.com,
        michael.roth@....com, mingo@...hat.com, naoya.horiguchi@....com,
        pbonzini@...hat.com, qperret@...gle.com, rppt@...nel.org,
        seanjc@...gle.com, shuah@...nel.org, steven.price@....com,
        tabba@...gle.com, tglx@...utronix.de, vannapurve@...gle.com,
        vbabka@...e.cz, vkuznets@...hat.com, wanpengli@...cent.com,
        wei.w.wang@...el.com, x86@...nel.org, yu.c.zhang@...ux.intel.com,
        Ackerley Tng <ackerleytng@...gle.com>
Subject: [RFC PATCH v4 2/2] selftests: restrictedmem: Check
 memfd_restricted()'s handling of provided userspace mount

For memfd_restricted() calls without a userspace mount, the backing
file should be the shmem mount in the kernel, and the size of backing
pages should be as defined by system-wide shmem configuration.

If a userspace mount is provided, the size of backing pages should be
as defined in the mount.

Also includes negative tests for invalid inputs, including fds
representing read-only superblocks/mounts.

Signed-off-by: Ackerley Tng <ackerleytng@...gle.com>
---
 tools/testing/selftests/mm/.gitignore         |   1 +
 tools/testing/selftests/mm/Makefile           |   1 +
 .../selftests/mm/memfd_restricted_usermnt.c   | 529 ++++++++++++++++++
 tools/testing/selftests/mm/run_vmtests.sh     |   3 +
 4 files changed, 534 insertions(+)
 create mode 100644 tools/testing/selftests/mm/memfd_restricted_usermnt.c

diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index fb6e4233374d..dba320c8151a 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -31,6 +31,7 @@ map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
 memfd_restricted
+memfd_restricted_usermnt
 memfd_secret
 soft-dirty
 split_huge_page_test
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 5ec338ea1fed..2f5df7a12ea5 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -46,6 +46,7 @@ TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += memfd_restricted
+TEST_GEN_FILES += memfd_restricted_usermnt
 TEST_GEN_FILES += memfd_secret
 TEST_GEN_FILES += migration
 TEST_GEN_FILES += mlock-random-test
diff --git a/tools/testing/selftests/mm/memfd_restricted_usermnt.c b/tools/testing/selftests/mm/memfd_restricted_usermnt.c
new file mode 100644
index 000000000000..0be04e3d714d
--- /dev/null
+++ b/tools/testing/selftests/mm/memfd_restricted_usermnt.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE /* for O_PATH */
+#define _POSIX_C_SOURCE /* for PATH_MAX */
+#include <limits.h>
+#include <sys/syscall.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include "linux/restrictedmem.h"
+
+#include "../kselftest_harness.h"
+
+static int memfd_restricted(unsigned int flags, int fd)
+{
+	return syscall(__NR_memfd_restricted, flags, fd);
+}
+
+static int get_hpage_pmd_size(void)
+{
+	FILE *fp;
+	char buf[100];
+	char *ret;
+	int size;
+
+	fp = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
+	if (!fp)
+		return -1;
+
+	ret = fgets(buf, 100, fp);
+	if (ret != buf) {
+		size = -1;
+		goto out;
+	}
+
+	if (sscanf(buf, "%d\n", &size) != 1)
+		size = -1;
+
+out:
+	fclose(fp);
+
+	return size;
+}
+
+static int write_string_to_file(const char *path, const char *string)
+{
+	FILE *fp;
+	size_t len = strlen(string);
+	int ret = -1;
+
+	fp = fopen(path, "w");
+	if (!fp)
+		return ret;
+
+	if (fwrite(string, 1, len, fp) != len)
+		goto out;
+
+	ret = 0;
+
+out:
+	fclose(fp);
+	return ret;
+}
+
+/*
+ * Expect shmem thp policy to be one of always, within_size, advise, never,
+ * deny, force
+ */
+#define POLICY_BUF_SIZE 12
+
+static bool is_valid_shmem_thp_policy(char *policy)
+{
+	if (strcmp(policy, "always") == 0)
+		return true;
+	if (strcmp(policy, "within_size") == 0)
+		return true;
+	if (strcmp(policy, "advise") == 0)
+		return true;
+	if (strcmp(policy, "never") == 0)
+		return true;
+	if (strcmp(policy, "deny") == 0)
+		return true;
+	if (strcmp(policy, "force") == 0)
+		return true;
+
+	return false;
+}
+
+static int get_shmem_thp_policy(char *policy)
+{
+	FILE *fp;
+	char buf[100];
+	char *left = NULL;
+	char *right = NULL;
+	int ret = -1;
+
+	fp = fopen("/sys/kernel/mm/transparent_hugepage/shmem_enabled", "r");
+	if (!fp)
+		return -1;
+
+	if (fgets(buf, 100, fp) != buf)
+		goto out;
+
+	/*
+	 * Expect shmem_enabled to be of format like "always within_size advise
+	 * [never] deny force"
+	 */
+	left = memchr(buf, '[', 100);
+	if (!left)
+		goto out;
+
+	right = memchr(buf, ']', 100);
+	if (!right)
+		goto out;
+
+	memcpy(policy, left + 1, right - left - 1);
+
+	ret = !is_valid_shmem_thp_policy(policy);
+
+out:
+	fclose(fp);
+	return ret;
+}
+
+static int set_shmem_thp_policy(char *policy)
+{
+	int ret = -1;
+	/* +1 for newline */
+	char to_write[POLICY_BUF_SIZE + 1] = { 0 };
+
+	if (!is_valid_shmem_thp_policy(policy))
+		return ret;
+
+	ret = snprintf(to_write, POLICY_BUF_SIZE + 1, "%s\n", policy);
+	if (ret != strlen(policy) + 1)
+		return -1;
+
+	ret = write_string_to_file(
+		"/sys/kernel/mm/transparent_hugepage/shmem_enabled", to_write);
+
+	return ret;
+}
+
+FIXTURE(reset_shmem_enabled)
+{
+	char shmem_enabled[POLICY_BUF_SIZE];
+};
+
+FIXTURE_SETUP(reset_shmem_enabled)
+{
+	memset(self->shmem_enabled, 0, POLICY_BUF_SIZE);
+	ASSERT_EQ(get_shmem_thp_policy(self->shmem_enabled), 0);
+}
+
+FIXTURE_TEARDOWN(reset_shmem_enabled)
+{
+	ASSERT_EQ(set_shmem_thp_policy(self->shmem_enabled), 0);
+}
+
+TEST_F(reset_shmem_enabled, restrictedmem_fstat_shmem_enabled_never)
+{
+	int fd = -1;
+	struct stat stat;
+
+	ASSERT_EQ(set_shmem_thp_policy("never"), 0);
+
+	fd = memfd_restricted(0, -1);
+	ASSERT_GT(fd, 0);
+
+	ASSERT_EQ(fstat(fd, &stat), 0);
+
+	/*
+	 * st_blksize is set based on the superblock's s_blocksize_bits. For
+	 * shmem, this is set to PAGE_SHIFT
+	 */
+	ASSERT_EQ(stat.st_blksize, getpagesize());
+
+	close(fd);
+}
+
+TEST_F(reset_shmem_enabled, restrictedmem_fstat_shmem_enabled_always)
+{
+	int fd = -1;
+	struct stat stat;
+
+	ASSERT_EQ(set_shmem_thp_policy("always"), 0);
+
+	fd = memfd_restricted(0, -1);
+	ASSERT_GT(fd, 0);
+
+	ASSERT_EQ(fstat(fd, &stat), 0);
+
+	ASSERT_EQ(stat.st_blksize, get_hpage_pmd_size());
+
+	close(fd);
+}
+
+TEST(restrictedmem_tmpfile_invalid_fd)
+{
+	int fd = memfd_restricted(MEMFD_RSTD_USERMNT, -2);
+
+	ASSERT_EQ(fd, -1);
+	ASSERT_EQ(errno, EBADF);
+}
+
+TEST(restrictedmem_tmpfile_fd_not_a_mount)
+{
+	int fd = memfd_restricted(MEMFD_RSTD_USERMNT, STDOUT_FILENO);
+
+	ASSERT_EQ(fd, -1);
+	ASSERT_EQ(errno, EINVAL);
+}
+
+TEST(restrictedmem_tmpfile_not_tmpfs_mount)
+{
+	int fd = -1;
+	int mfd = -1;
+
+	mfd = open("/proc", O_PATH);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+
+	ASSERT_EQ(fd, -1);
+	ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(tmpfs_sfd)
+{
+	int sfd;
+};
+
+FIXTURE_SETUP(tmpfs_sfd)
+{
+	self->sfd = fsopen("tmpfs", 0);
+	ASSERT_NE(self->sfd, -1);
+}
+
+FIXTURE_TEARDOWN(tmpfs_sfd)
+{
+	EXPECT_EQ(close(self->sfd), 0);
+}
+
+TEST_F(tmpfs_sfd, restrictedmem_fstat_tmpfs_huge_always)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+	struct stat stat;
+
+	fsconfig(self->sfd, FSCONFIG_SET_STRING, "huge", "always", 0);
+	fsconfig(self->sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+	mfd = fsmount(self->sfd, 0, 0);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_GT(fd, 0);
+
+	/* User can close reference to mount */
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+
+	ret = fstat(fd, &stat);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(stat.st_blksize, get_hpage_pmd_size());
+
+	close(fd);
+}
+
+TEST_F(tmpfs_sfd, restrictedmem_fstat_tmpfs_huge_never)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+	struct stat stat;
+
+	fsconfig(self->sfd, FSCONFIG_SET_STRING, "huge", "never", 0);
+	fsconfig(self->sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+	mfd = fsmount(self->sfd, 0, 0);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_GT(fd, 0);
+
+	/* User can close reference to mount */
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+
+	ret = fstat(fd, &stat);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(stat.st_blksize, getpagesize());
+
+	close(fd);
+}
+
+TEST_F(tmpfs_sfd, restrictedmem_check_mount_flags)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+
+	fsconfig(self->sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+	mfd = fsmount(self->sfd, 0, MOUNT_ATTR_RDONLY);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_EQ(fd, -1);
+	ASSERT_EQ(errno, EROFS);
+
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+}
+
+TEST_F(tmpfs_sfd, restrictedmem_check_superblock_flags)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+
+	fsconfig(self->sfd, FSCONFIG_SET_FLAG, "ro", NULL, 0);
+	fsconfig(self->sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+	mfd = fsmount(self->sfd, 0, 0);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_EQ(fd, -1);
+	ASSERT_EQ(errno, EROFS);
+
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+}
+
+static bool directory_exists(const char *path)
+{
+	struct stat sb;
+
+	return stat(path, &sb) == 0 && S_ISDIR(sb.st_mode);
+}
+
+FIXTURE(restrictedmem_test_mount_path)
+{
+	char *mount_path;
+};
+
+FIXTURE_SETUP(restrictedmem_test_mount_path)
+{
+	int ret = -1;
+
+	/* /tmp is an FHS-mandated world-writable directory */
+	self->mount_path = "/tmp/restrictedmem-selftest-mnt";
+
+	if (!directory_exists(self->mount_path)) {
+		ret = mkdir(self->mount_path, 0777);
+		ASSERT_EQ(ret, 0);
+	}
+}
+
+FIXTURE_TEARDOWN(restrictedmem_test_mount_path)
+{
+	int ret = -1;
+
+	if (!directory_exists(self->mount_path))
+		return;
+
+	ret = umount2(self->mount_path, MNT_FORCE);
+	EXPECT_EQ(ret, 0);
+	if (ret == -1 && errno == EINVAL)
+		fprintf(stderr, "  %s was not mounted\n", self->mount_path);
+
+	ret = rmdir(self->mount_path);
+	EXPECT_EQ(ret, 0);
+	if (ret == -1)
+		fprintf(stderr, "  rmdir(%s) failed: %m\n", self->mount_path);
+}
+
+/*
+ * memfd_restricted() syscall can only be used with the fd of the root of the
+ * mount. When the restrictedmem's fd is open, a user should not be able to
+ * unmount or remove the mounted directory
+ */
+TEST_F(restrictedmem_test_mount_path, restrictedmem_umount_rmdir_while_file_open)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+	struct stat stat;
+
+	ret = mount("name", self->mount_path, "tmpfs", 0, "huge=always");
+	ASSERT_EQ(ret, 0);
+
+	mfd = open(self->mount_path, O_PATH);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_GT(fd, 0);
+
+	/* We don't need this reference to the mount anymore */
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+
+	/* restrictedmem's fd should still be usable */
+	ret = fstat(fd, &stat);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(stat.st_blksize, get_hpage_pmd_size());
+
+	/* User should not be able to unmount directory */
+	ret = umount2(self->mount_path, MNT_FORCE);
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EBUSY);
+
+	ret = rmdir(self->mount_path);
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EBUSY);
+
+	close(fd);
+}
+
+/* The fd of a file on the mount cannot be provided as mount_fd */
+TEST_F(restrictedmem_test_mount_path, restrictedmem_provide_fd_of_file)
+{
+	int ret = -1;
+	int fd = -1;
+	int ffd = -1;
+	char tmp_file_path[PATH_MAX] = { 0 };
+
+	ret = mount("name", self->mount_path, "tmpfs", 0, "huge=always");
+	ASSERT_EQ(ret, 0);
+
+	snprintf(tmp_file_path, PATH_MAX, "%s/tmp-file", self->mount_path);
+	ret = write_string_to_file(tmp_file_path, "filler\n");
+	ASSERT_EQ(ret, 0);
+
+	ffd = open(tmp_file_path, O_RDWR);
+	ASSERT_GT(ffd, 0);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, ffd);
+	ASSERT_LT(fd, 0);
+	ASSERT_EQ(errno, EINVAL);
+
+	ret = close(ffd);
+	ASSERT_EQ(ret, 0);
+
+	close(fd);
+	remove(tmp_file_path);
+}
+
+/* The fd of files on the mount cannot be provided as mount_fd */
+TEST_F(restrictedmem_test_mount_path, restrictedmem_provide_fd_of_file_in_subdir)
+{
+	int ret = -1;
+	int fd = -1;
+	int ffd = -1;
+	char tmp_dir_path[PATH_MAX] = { 0 };
+	char tmp_file_path[PATH_MAX] = { 0 };
+
+	ret = mount("name", self->mount_path, "tmpfs", 0, "huge=always");
+	ASSERT_EQ(ret, 0);
+
+	snprintf(tmp_dir_path, PATH_MAX, "%s/tmp-subdir", self->mount_path);
+	ret = mkdir(tmp_dir_path, 0777);
+	ASSERT_EQ(ret, 0);
+
+	snprintf(tmp_file_path, PATH_MAX, "%s/tmp-subdir/tmp-file",
+		 self->mount_path);
+	ret = write_string_to_file(tmp_file_path, "filler\n");
+	ASSERT_EQ(ret, 0);
+
+	ffd = open(tmp_file_path, O_RDWR);
+	ASSERT_NE(ffd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, ffd);
+	ASSERT_LT(fd, 0);
+	ASSERT_EQ(errno, EINVAL);
+
+	ret = close(ffd);
+	ASSERT_EQ(ret, 0);
+
+	remove(tmp_file_path);
+	rmdir(tmp_dir_path);
+}
+
+/*
+ * fds representing bind mounts must represent the root of the original
+ * filesystem
+ */
+TEST_F(restrictedmem_test_mount_path, restrictedmem_provide_fd_of_original_fs)
+{
+	int ret = -1;
+	int fd = -1;
+	int mfd = -1;
+	char tmp_dir_path_0[PATH_MAX] = { 0 };
+	char tmp_dir_path_1[PATH_MAX] = { 0 };
+
+	ret = mount("name", self->mount_path, "tmpfs", 0, "huge=always");
+	ASSERT_EQ(ret, 0);
+
+	snprintf(tmp_dir_path_0, PATH_MAX, "%s/tmp-subdir-0", self->mount_path);
+	ret = mkdir(tmp_dir_path_0, 0777);
+	ASSERT_EQ(ret, 0);
+
+	snprintf(tmp_dir_path_1, PATH_MAX, "%s/tmp-subdir-1", self->mount_path);
+	ret = mkdir(tmp_dir_path_1, 0777);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount(tmp_dir_path_0, tmp_dir_path_1, "tmpfs", MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mfd = open(tmp_dir_path_1, O_PATH);
+	ASSERT_NE(mfd, -1);
+
+	fd = memfd_restricted(MEMFD_RSTD_USERMNT, mfd);
+	ASSERT_LT(fd, 0);
+	ASSERT_EQ(errno, EINVAL);
+
+	ret = close(mfd);
+	ASSERT_EQ(ret, 0);
+
+	ret = umount2(tmp_dir_path_1, MNT_FORCE);
+	ASSERT_EQ(ret, 0);
+
+	rmdir(tmp_dir_path_0);
+	rmdir(tmp_dir_path_1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 53de84e3ec2c..04238f86f037 100644
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -40,6 +40,8 @@ separated by spaces:
 	test memadvise(2) MADV_POPULATE_{READ,WRITE} options
 - memfd_restricted_
 	test memfd_restricted(2)
+- memfd_restricted_usermnt
+	test memfd_restricted(2)'s handling of provided userspace mounts
 - memfd_secret
 	test memfd_secret(2)
 - process_mrelease
@@ -239,6 +241,7 @@ CATEGORY="hmm" run_test ./test_hmm.sh smoke
 CATEGORY="madv_populate" run_test ./madv_populate
 
 CATEGORY="memfd_restricted" run_test ./memfd_restricted
+CATEGORY="memfd_restricted_usermnt" run_test ./memfd_restricted_usermnt
 
 CATEGORY="memfd_secret" run_test ./memfd_secret
 
-- 
2.40.0.577.gac1e443424-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ