linux-kernel - Re: [PATCH v5 7/7] KVM: selftests: Add an NX huge pages jitter test

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aIFOV4ydqsyDH72G@google.com>
Date: Wed, 23 Jul 2025 14:04:23 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: James Houghton <jthoughton@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, Vipin Sharma <vipinsh@...gle.com>, 
	David Matlack <dmatlack@...gle.com>, kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5 7/7] KVM: selftests: Add an NX huge pages jitter test

On Mon, Jul 07, 2025, James Houghton wrote:
> +		/*
> +		 * To time the jitter on all faults on pages that are not
> +		 * undergoing nx huge page recovery, only execute on every
> +		 * other 1G region, and only time the non-executing pass.
> +		 */
> +		if (page & (1UL << 18)) {

This needs a #define or helper, I have no idea what 1 << 18 is doing.

> +			uint64_t tsc1, tsc2;
> +
> +			tsc1 = rdtsc();
> +			*gva = 0;
> +			tsc2 = rdtsc();
> +
> +			if (tsc2 - tsc1 > max_cycles)
> +				max_cycles = tsc2 - tsc1;
> +		} else {
> +			*gva = RETURN_OPCODE;
> +			((void (*)(void)) gva)();
> +		}
> +	}
> +
> +	GUEST_SYNC1(max_cycles);
> +}
> +
> +struct kvm_vm *create_vm(uint64_t memory_bytes,
> +			 enum vm_mem_backing_src_type backing_src)
> +{
> +	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
> +	struct guest_args *args = &guest_args;
> +	uint64_t guest_num_pages;
> +	uint64_t region_end_gfn;
> +	uint64_t gpa, size;
> +	struct kvm_vm *vm;
> +
> +	args->guest_page_size = getpagesize();
> +
> +	guest_num_pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT,
> +				memory_bytes / args->guest_page_size);
> +
> +	TEST_ASSERT(memory_bytes % getpagesize() == 0,
> +		    "Guest memory size is not host page size aligned.");
> +
> +	vm = __vm_create_with_one_vcpu(&vcpu, guest_num_pages, guest_code);
> +
> +	/* Put the test region at the top guest physical memory. */
> +	region_end_gfn = vm->max_gfn + 1;
> +
> +	/*
> +	 * If there should be more memory in the guest test region than there
> +	 * can be pages in the guest, it will definitely cause problems.
> +	 */
> +	TEST_ASSERT(guest_num_pages < region_end_gfn,
> +		    "Requested more guest memory than address space allows.\n"
> +		    "    guest pages: %" PRIx64 " max gfn: %" PRIx64
> +		    " wss: %" PRIx64 "]",

Don't wrap this last one.

> +		    guest_num_pages, region_end_gfn - 1, memory_bytes);
> +
> +	gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
> +	gpa = align_down(gpa, backing_src_pagesz);
> +
> +	size = guest_num_pages * args->guest_page_size;
> +	pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
> +		gpa, gpa + size);

And don't wrap here either (82 chars is totally fine).

> +
> +	/*
> +	 * Pass in MAP_POPULATE, because we are trying to test how long
> +	 * we have to wait for a pending NX huge page recovery to take.
> +	 * We do not want to also wait for GUP itself.
> +	 */

Right, but we also don't want to wait for the initial fault-in either, no?  I.e.
plumbing in MAP_POPULATE only fixes the worst of the delay, and maybe only with
the TDP MMU enabled.

In other words, it seems like we need a helper (option?) to excplitly "prefault",
all memory from within the guest, not the ability to specify MAP_POPULATE.

> +	vm_mem_add(vm, backing_src, gpa, 1,
> +		   guest_num_pages, 0, -1, 0, MAP_POPULATE);
> +
> +	virt_map(vm, guest_test_virt_mem, gpa, guest_num_pages);
> +
> +	args->pages = guest_num_pages;
> +
> +	/* Export the shared variables to the guest. */
> +	sync_global_to_guest(vm, guest_args);
> +
> +	return vm;
> +}
> +
> +static void run_vcpu(struct kvm_vcpu *vcpu)
> +{
> +	struct timespec ts_elapsed;
> +	struct timespec ts_start;
> +	struct ucall uc = {};
> +	int ret;
> +
> +	clock_gettime(CLOCK_MONOTONIC, &ts_start);
> +
> +	ret = _vcpu_run(vcpu);
> +
> +	ts_elapsed = timespec_elapsed(ts_start);
> +
> +	TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
> +
> +	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
> +		    "Invalid guest sync status: %" PRIu64, uc.cmd);
> +
> +	pr_info("Duration: %ld.%09lds\n",
> +		ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
> +	pr_info("Max fault latency: %" PRIu64 " cycles\n", uc.args[0]);
> +}
> +
> +static void run_test(struct test_params *params)
> +{
> +	/*
> +	 * The fault + execute pattern in the guest relies on having more than
> +	 * 1GiB to use.
> +	 */
> +	TEST_ASSERT(params->memory_bytes > PAGE_SIZE << 18,

Oooh, the 1 << 18 is 1GiB on PFNs.  Ugh.  Just use SZ_1G here.  And assert immediate
after setting params.memory_bytes, don't wait until the test runs.

> +		    "Must use more than 1GiB of memory.");
> +
> +	create_vm(params->memory_bytes, params->backing_src);
> +
> +	pr_info("\n");
> +
> +	run_vcpu(vcpu);
> +}
> +
> +static void help(char *name)
> +{
> +	puts("");
> +	printf("usage: %s [-h] [-b bytes] [-s mem_type]\n",
> +	       name);
> +	puts("");
> +	printf(" -h: Display this help message.");
> +	printf(" -b: specify the size of the memory region which should be\n"
> +	       "     dirtied by the guest. e.g. 2048M or 3G.\n"
> +	       "     (default: 2G, must be greater than 1G)\n");
> +	backing_src_help("-s");
> +	puts("");
> +	exit(0);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct test_params params = {
> +		.backing_src = DEFAULT_VM_MEM_SRC,
> +		.memory_bytes = DEFAULT_TEST_MEM_SIZE,
> +	};
> +	int opt;
> +
> +	while ((opt = getopt(argc, argv, "hb:s:")) != -1) {
> +		switch (opt) {
> +		case 'b':
> +			params.memory_bytes = parse_size(optarg);
> +			break;
> +		case 's':
> +			params.backing_src = parse_backing_src_type(optarg);
> +			break;
> +		case 'h':
> +		default:
> +			help(argv[0]);
> +			break;
> +		}
> +	}
> +
> +	run_test(&params);
> +}
> -- 
> 2.50.0.727.gbf7dc18ff4-goog
>