lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Sat,  9 Jan 2021 19:44:34 -0500
From:   Andrea Arcangeli <aarcange@...hat.com>
To:     Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org
Cc:     linux-kernel@...r.kernel.org, Yu Zhao <yuzhao@...gle.com>,
        Andy Lutomirski <luto@...nel.org>,
        Peter Xu <peterx@...hat.com>,
        Pavel Emelyanov <xemul@...nvz.org>,
        Mike Kravetz <mike.kravetz@...cle.com>,
        Mike Rapoport <rppt@...ux.vnet.ibm.com>,
        Minchan Kim <minchan@...nel.org>,
        Will Deacon <will@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Hugh Dickins <hughd@...gle.com>,
        "Kirill A. Shutemov" <kirill@...temov.name>,
        Matthew Wilcox <willy@...radead.org>,
        Oleg Nesterov <oleg@...hat.com>, Jann Horn <jannh@...gle.com>,
        Kees Cook <keescook@...omium.org>,
        John Hubbard <jhubbard@...dia.com>,
        Leon Romanovsky <leonro@...dia.com>,
        Jason Gunthorpe <jgg@...pe.ca>, Jan Kara <jack@...e.cz>,
        Kirill Tkhai <ktkhai@...tuozzo.com>,
        Nadav Amit <nadav.amit@...il.com>, Jens Axboe <axboe@...nel.dk>
Subject: [PATCH 0/1] mm: restore full accuracy in COW page reuse

Hello Andrew and everyone,

Once we agree that COW page reuse requires full accuracy, the next
step is to re-apply 17839856fd588f4ab6b789f482ed3ffd7c403e1f and to
return going in that direction.

Who is going to orthogonally secure vmsplice, Andy, Jason, Jens?  Once
vmsplice is secured from taking unconstrained unprivileged long term
GUP pins, the attack from child to parent can still happen in theory,
but statistically speaking once that huge window is closed, it won't
be a practical concern, so it'll give us time to perfect the full
solution by closing all windows the VM core. vmsplice has to be
orthogonally fixed anyway, even if all windows were closed in VM core
first.

Unfortunately it's still not clear exactly what failed with
17839856fd588f4ab6b789f482ed3ffd7c403e1f but the whole point is that
we need to discuss that together.

Thanks,
Andrea

// SPDX-License-Identifier: GPL-3.0-or-later
/*
 *  reproducer for v5.11 O_DIRECT mm corruption with page_count
 *  instead of mapcount in do_wp_page.
 *
 *  Copyright (C) 2021  Red Hat, Inc.
 *
 *  gcc -O2 -o page_count_do_wp_page page_count_do_wp_page.c -lpthread
 *  page_count_do_wp_page ./whateverfile
 *
 *  NOTE: CONFIG_SOFT_DIRTY=y is required in the kernel config.
 */

#define _GNU_SOURCE
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/mman.h>

#define PAGE_SIZE (1UL<<12)
#define HARDBLKSIZE 512

static void* writer(void *_mem)
{
	char *mem = (char *)_mem;
	for(;;) {
		usleep(random() % 1000);
		mem[PAGE_SIZE-1] = 0;
	}
}

static void* background_soft_dirty(void *data)
{
	long fd = (long) data;
	for (;;)
		if (write(fd, "4", 1) != 1)
			perror("write soft dirty"), exit(1);
}

int main(int argc, char *argv[])
{
	if (argc < 2)
		printf("%s <filename>", argv[0]), exit(1);

	char path[PAGE_SIZE];
	strcpy(path, "/proc/");
	sprintf(path + strlen(path), "%d", getpid());
	strcat(path, "/clear_refs");
	long soft_dirty_fd = open(path, O_WRONLY);
	if (soft_dirty_fd < 0)
		perror("open clear_refs"), exit(1);

	char *mem;
	if (posix_memalign((void **)&mem, PAGE_SIZE, PAGE_SIZE*3))
		perror("posix_memalign"), exit(1);
	/* THP is not using page_count so it would not corrupt memory */
	if (madvise(mem, PAGE_SIZE, MADV_NOHUGEPAGE))
		perror("madvise"), exit(1);
	bzero(mem, PAGE_SIZE * 3);
	memset(mem + PAGE_SIZE * 2, 0xff, HARDBLKSIZE);

	/*
	 * This is not specific to O_DIRECT. Even if O_DIRECT was
	 * forced to use PAGE_SIZE minimum granularity for reads, a
	 * recvmsg would create the same issue since it also use
	 * iov_iter_get_pages internally to create transient GUP pins
	 * on anon memory.
	 */
	int fd = open(argv[1], O_DIRECT|O_CREAT|O_RDWR|O_TRUNC, 0600);
	if (fd < 0)
		perror("open"), exit(1);
	if (write(fd, mem, PAGE_SIZE) != PAGE_SIZE)
		perror("write"), exit(1);

	pthread_t soft_dirty;
	if (pthread_create(&soft_dirty, NULL,
			   background_soft_dirty, (void *)soft_dirty_fd))
		perror("soft_dirty"), exit(1);

	pthread_t thread;
	if (pthread_create(&thread, NULL, writer, mem))
		perror("pthread_create"), exit(1);

	bool skip_memset = true;
	while (1) {
		if (pread(fd, mem, HARDBLKSIZE, 0) != HARDBLKSIZE)
			perror("read"), exit(1);
		if (memcmp(mem, mem+PAGE_SIZE, HARDBLKSIZE)) {
			if (memcmp(mem, mem+PAGE_SIZE*2, PAGE_SIZE)) {
				if (skip_memset)
					printf("unexpected memory "
					       "corruption detected\n");
				else
					printf("memory corruption detected, "
					       "dumping page\n");
				int end = PAGE_SIZE;
				if (!memcmp(mem+HARDBLKSIZE, mem+PAGE_SIZE,
					    PAGE_SIZE-HARDBLKSIZE))
					end = HARDBLKSIZE;
				for (int i = 0; i < end; i++)
					printf("%x", mem[i]);
				printf("\n");
			} else
				printf("memory corruption detected\n");
		}
		skip_memset = !skip_memset;
		if (!skip_memset)
			memset(mem, 0xff, HARDBLKSIZE);
	}

	return 0;
}

Andrea Arcangeli (1):
  mm: restore full accuracy in COW page reuse

 include/linux/ksm.h |  7 ++++++
 mm/ksm.c            | 25 +++++++++++++++++++
 mm/memory.c         | 59 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 74 insertions(+), 17 deletions(-)

Powered by blists - more mailing lists