linux-kernel - [PATCH v2 8/8] DO NOT MERGE: Hack-a-test to verify gpc invalidation+refresh

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220427014004.1992589-9-seanjc@google.com>
Date:   Wed, 27 Apr 2022 01:40:04 +0000
From:   Sean Christopherson <seanjc@...gle.com>
To:     Paolo Bonzini <pbonzini@...hat.com>
Cc:     Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, David Woodhouse <dwmw@...zon.co.uk>,
        Mingwei Zhang <mizhang@...gle.com>,
        Maxim Levitsky <mlevitsk@...hat.com>
Subject: [PATCH v2 8/8] DO NOT MERGE: Hack-a-test to verify gpc invalidation+refresh

Add a VM-wide gfn=>pfn cache and a fake MSR to let userspace control the
cache.  On writes, reflect the value of the MSR into the backing page of
a gfn=>pfn cache so that userspace can detect if a value was written to
the wrong page, i.e. to a stale mapping.

Spin up 16 vCPUs (arbitrary) to use/refresh the cache, and another thread
to trigger mmu_notifier events and memslot updates.

Not-signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
 arch/x86/kvm/x86.c                     |  30 ++++
 include/linux/kvm_host.h               |   2 +
 tools/testing/selftests/kvm/.gitignore |   1 +
 tools/testing/selftests/kvm/Makefile   |   2 +
 tools/testing/selftests/kvm/gpc_test.c | 217 +++++++++++++++++++++++++
 virt/kvm/pfncache.c                    |   2 +
 6 files changed, 254 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/gpc_test.c

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 951d0a78ccda..7afdb7f39821 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3473,6 +3473,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		return kvm_xen_write_hypercall_page(vcpu, data);
 
 	switch (msr) {
+	case 0xdeadbeefu: {
+		struct gfn_to_pfn_cache *gpc = &vcpu->kvm->test_cache;
+		unsigned long flags;
+
+		if (kvm_gfn_to_pfn_cache_refresh(vcpu->kvm, gpc, data, 8))
+			break;
+
+		read_lock_irqsave(&gpc->lock, flags);
+		if (kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, data, 8))
+			*(u64 *)(gpc->khva) = data;
+		read_unlock_irqrestore(&gpc->lock, flags);
+		break;
+	}
+
 	case MSR_AMD64_NB_CFG:
 	case MSR_IA32_UCODE_WRITE:
 	case MSR_VM_HSAVE_PA:
@@ -3825,6 +3839,19 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	switch (msr_info->index) {
+	case 0xdeadbeefu: {
+		struct gfn_to_pfn_cache *gpc = &vcpu->kvm->test_cache;
+		unsigned long flags;
+
+		read_lock_irqsave(&gpc->lock, flags);
+		if (kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, gpc->gpa, 8))
+			msr_info->data = gpc->gpa;
+		else
+			msr_info->data = 0xdeadbeefu;
+		read_unlock_irqrestore(&gpc->lock, flags);
+		return 0;
+	}
+
 	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_EBL_CR_POWERON:
 	case MSR_IA32_LASTBRANCHFROMIP:
@@ -11794,6 +11821,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_hv_init_vm(kvm);
 	kvm_xen_init_vm(kvm);
 
+	kvm_gfn_to_pfn_cache_init(kvm, &kvm->test_cache, NULL,
+				  KVM_HOST_USES_PFN, 0, 0);
+
 	return static_call(kvm_x86_vm_init)(kvm);
 
 out_page_track:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 252ee4a61b58..88ed76ad8bc7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -718,6 +718,8 @@ struct kvm {
 	spinlock_t gpc_lock;
 	struct list_head gpc_list;
 
+	struct gfn_to_pfn_cache test_cache;
+
 	/*
 	 * created_vcpus is protected by kvm->lock, and is incremented
 	 * at the beginning of KVM_CREATE_VCPU.  online_vcpus is only
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 56140068b763..0310a57a1a4f 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -70,3 +70,4 @@
 /steal_time
 /kvm_binary_stats_test
 /system_counter_offset_test
+/gpc_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index af582d168621..0adc9ac954d1 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -104,6 +104,8 @@ TEST_GEN_PROGS_x86_64 += steal_time
 TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
 TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 
+TEST_GEN_PROGS_x86_64 += gpc_test
+
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
diff --git a/tools/testing/selftests/kvm/gpc_test.c b/tools/testing/selftests/kvm/gpc_test.c
new file mode 100644
index 000000000000..5c509e7bb4da
--- /dev/null
+++ b/tools/testing/selftests/kvm/gpc_test.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NR_VCPUS 16
+
+#define NR_ITERATIONS	1000
+
+#define PAGE_SIZE 4096
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE	0x100000
+#endif
+
+static const uint64_t gpa_base = (4ull * (1 << 30));
+
+static struct kvm_vm *vm;
+
+static pthread_t memory_thread;
+static pthread_t vcpu_threads[NR_VCPUS];
+
+static bool fight;
+
+static uint64_t per_vcpu_gpa_aligned(int vcpu_id)
+{
+	return gpa_base + (vcpu_id * PAGE_SIZE);
+}
+
+static uint64_t per_vcpu_gpa(int vcpu_id)
+{
+	return per_vcpu_gpa_aligned(vcpu_id) + vcpu_id;
+}
+
+static void guest_code(int vcpu_id)
+{
+	uint64_t this_vcpu_gpa;
+	int i;
+
+	this_vcpu_gpa = per_vcpu_gpa(vcpu_id);
+
+	for (i = 0; i < NR_ITERATIONS; i++)
+		wrmsr(0xdeadbeefu, this_vcpu_gpa);
+	GUEST_SYNC(0);
+}
+
+static void *memory_worker(void *ign)
+{
+	int i, x, r, k;
+	uint64_t *hva;
+	uint64_t gpa;
+	void *mem;
+
+	while (!READ_ONCE(fight))
+		cpu_relax();
+
+	for (k = 0; k < 50; k++) {
+		i = (unsigned int)random() % NR_VCPUS;
+
+		gpa = per_vcpu_gpa_aligned(i);
+		hva = (void *)gpa;
+
+		x = (unsigned int)random() % 5;
+		switch (x) {
+		case 0:
+			r = munmap(hva, PAGE_SIZE);
+			TEST_ASSERT(!r, "Failed to mumap (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+
+			mem = mmap(hva, PAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+			TEST_ASSERT(mem != MAP_FAILED || mem != hva,
+				    "Failed to mmap (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+			break;
+		case 1:
+			vm_set_user_memory_region(vm, i + 1, KVM_MEM_LOG_DIRTY_PAGES,
+						  gpa, PAGE_SIZE, hva);
+			vm_set_user_memory_region(vm, i + 1, 0, gpa, PAGE_SIZE, hva);
+			break;
+		case 2:
+			r = mprotect(hva, PAGE_SIZE, PROT_NONE);
+			TEST_ASSERT(!r, "Failed to mprotect (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+
+			r = mprotect(hva, PAGE_SIZE, PROT_READ | PROT_WRITE);
+			TEST_ASSERT(!r, "Failed to mprotect (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+			break;
+		case 3:
+			r = mprotect(hva, PAGE_SIZE, PROT_READ);
+			TEST_ASSERT(!r, "Failed to mprotect (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+
+			r = mprotect(hva, PAGE_SIZE, PROT_READ | PROT_WRITE);
+			TEST_ASSERT(!r, "Failed to mprotect (hva = %lx), errno = %d (%s)",
+				    (unsigned long)hva, errno, strerror(errno));
+			break;
+		case 4:
+			vm_set_user_memory_region(vm, i + 1, 0, gpa, 0, 0);
+			vm_set_user_memory_region(vm, i + 1, 0, gpa, PAGE_SIZE,
+						  (void *)per_vcpu_gpa_aligned(NR_VCPUS));
+			vm_set_user_memory_region(vm, i + 1, 0, gpa, 0, 0);
+			vm_set_user_memory_region(vm, i + 1, 0, gpa, PAGE_SIZE, hva);
+			break;
+		}
+	}
+	return NULL;
+}
+
+static void sync_guest(int vcpu_id)
+{
+	struct ucall uc;
+
+	switch (get_ucall(vm, vcpu_id, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(uc.args[1] == 0,
+			   "Unexpected sync ucall, got %lx", uc.args[1]);
+		break;
+	case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+		(const char *)uc.args[0],
+		__FILE__, uc.args[1], uc.args[2], uc.args[3]);
+		break;
+	default:
+		TEST_FAIL("Unexpected userspace exit, reason = %s\n",
+			  exit_reason_str(vcpu_state(vm, vcpu_id)->exit_reason));
+		break;
+	}
+}
+
+static void *vcpu_worker(void *data)
+{
+	int vcpu_id = (unsigned long)data;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+	while (!READ_ONCE(fight))
+		cpu_relax();
+
+	usleep(10);
+
+	vcpu_run(vm, vcpu_id);
+
+	sync_guest(vcpu_id);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	uint64_t *hva;
+	uint64_t gpa;
+	void *r;
+	int i;
+
+	srandom(time(0));
+
+	vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
+	ucall_init(vm, NULL);
+
+	pthread_create(&memory_thread, NULL, memory_worker, 0);
+
+	for (i = 0; i < NR_VCPUS; i++) {
+		pthread_create(&vcpu_threads[i], NULL, vcpu_worker, (void *)(unsigned long)i);
+
+		gpa = per_vcpu_gpa_aligned(i);
+		hva = (void *)gpa;
+		r = mmap(hva, PAGE_SIZE, PROT_READ | PROT_WRITE,
+			 MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+		TEST_ASSERT(r != MAP_FAILED, "mmap() '%lx' failed, errno = %d (%s)",
+			    gpa, errno, strerror(errno));
+
+		vm_set_user_memory_region(vm, i + 1, 0, gpa, PAGE_SIZE, hva);
+	}
+
+	WRITE_ONCE(fight, true);
+
+	for (i = 0; i < NR_VCPUS; i++)
+		pthread_join(vcpu_threads[i], NULL);
+
+	pthread_join(memory_thread, NULL);
+
+	for (i = 0; i < NR_VCPUS; i++) {
+		gpa = per_vcpu_gpa(i);
+		hva = (void *)gpa;
+
+		TEST_ASSERT(*hva == 0 || *hva == gpa,
+			    "Want '0' or '%lx', got '%lx'\n", gpa, *hva);
+	}
+
+	gpa = vcpu_get_msr(vm, 0, 0xdeadbeefu);
+	hva = (void *)gpa;
+	if (gpa != 0xdeadbeefu)
+		TEST_ASSERT(*hva == gpa, "Want '%lx', got '%lx'\n", gpa, *hva);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 3cb439b505b4..7881e6e6d91a 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -372,6 +372,8 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		list_add(&gpc->list, &kvm->gpc_list);
 		spin_unlock(&kvm->gpc_lock);
 	}
+	if (!len)
+		return -EINVAL;
 	return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
 }
 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
-- 
2.36.0.rc2.479.g8af0fa9b8e-goog