linux-kernel - Re: [PATCH RESEND v2 15/17] KVM: selftests: Add dirty ring buffer test

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <521fcdf6-db45-566d-7a83-e8c7a22cf7c5@redhat.com>
Date:   Tue, 24 Dec 2019 14:18:37 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Peter Xu <peterx@...hat.com>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc:     Dr David Alan Gilbert <dgilbert@...hat.com>,
        Christophe de Dinechin <dinechin@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        "Michael S . Tsirkin" <mst@...hat.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>
Subject: Re: [PATCH RESEND v2 15/17] KVM: selftests: Add dirty ring buffer
 test


On 2019/12/21 上午10:04, Peter Xu wrote:
> Add the initial dirty ring buffer test.
>
> The current test implements the userspace dirty ring collection, by
> only reaping the dirty ring when the ring is full.
>
> So it's still running asynchronously like this:
>
>              vcpu                             main thread
>
>    1. vcpu dirties pages
>    2. vcpu gets dirty ring full
>       (userspace exit)
>
>                                         3. main thread waits until full
>                                            (so hardware buffers flushed)
>                                         4. main thread collects
>                                         5. main thread continues vcpu
>
>    6. vcpu continues, goes back to 1
>
> We can't directly collects dirty bits during vcpu execution because
> otherwise we can't guarantee the hardware dirty bits were flushed when
> we collect and we're very strict on the dirty bits so otherwise we can
> fail the future verify procedure.  A follow up patch will make this
> test to support async just like the existing dirty log test, by adding
> a vcpu kick mechanism.
>
> Signed-off-by: Peter Xu <peterx@...hat.com>
> ---
>   tools/testing/selftests/kvm/dirty_log_test.c  | 174 +++++++++++++++++-
>   .../testing/selftests/kvm/include/kvm_util.h  |   3 +
>   tools/testing/selftests/kvm/lib/kvm_util.c    |  56 ++++++
>   .../selftests/kvm/lib/kvm_util_internal.h     |   3 +
>   4 files changed, 234 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
> index 3542311f56ff..af9b1a16c7d1 100644
> --- a/tools/testing/selftests/kvm/dirty_log_test.c
> +++ b/tools/testing/selftests/kvm/dirty_log_test.c
> @@ -12,8 +12,10 @@
>   #include <unistd.h>
>   #include <time.h>
>   #include <pthread.h>
> +#include <semaphore.h>
>   #include <linux/bitmap.h>
>   #include <linux/bitops.h>
> +#include <asm/barrier.h>
>   
>   #include "test_util.h"
>   #include "kvm_util.h"
> @@ -57,6 +59,8 @@
>   # define test_and_clear_bit_le	test_and_clear_bit
>   #endif
>   
> +#define TEST_DIRTY_RING_COUNT		1024
> +
>   /*
>    * Guest/Host shared variables. Ensure addr_gva2hva() and/or
>    * sync_global_to/from_guest() are used when accessing from
> @@ -128,6 +132,10 @@ static uint64_t host_dirty_count;
>   static uint64_t host_clear_count;
>   static uint64_t host_track_next_count;
>   
> +/* Whether dirty ring reset is requested, or finished */
> +static sem_t dirty_ring_vcpu_stop;
> +static sem_t dirty_ring_vcpu_cont;
> +
>   enum log_mode_t {
>   	/* Only use KVM_GET_DIRTY_LOG for logging */
>   	LOG_MODE_DIRTY_LOG = 0,
> @@ -135,6 +143,9 @@ enum log_mode_t {
>   	/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
>   	LOG_MODE_CLERA_LOG = 1,
>   
> +	/* Use dirty ring for logging */
> +	LOG_MODE_DIRTY_RING = 2,
> +
>   	LOG_MODE_NUM,
>   };
>   
> @@ -177,6 +188,118 @@ static void default_after_vcpu_run(struct kvm_vm *vm)
>   		    exit_reason_str(run->exit_reason));
>   }
>   
> +static void dirty_ring_create_vm_done(struct kvm_vm *vm)
> +{
> +	/*
> +	 * Switch to dirty ring mode after VM creation but before any
> +	 * of the vcpu creation.
> +	 */
> +	vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT *
> +			     sizeof(struct kvm_dirty_gfn));
> +}
> +
> +static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
> +				       struct kvm_dirty_ring_indices *indices,
> +				       int slot, void *bitmap,
> +				       uint32_t num_pages, int index)
> +{
> +	struct kvm_dirty_gfn *cur;
> +	uint32_t avail, fetch, count = 0;
> +
> +	/*
> +	 * We should keep it somewhere, but to be simple we read
> +	 * fetch_index too.
> +	 */
> +	fetch = READ_ONCE(indices->fetch_index);
> +	avail = READ_ONCE(indices->avail_index);
> +
> +	/* Make sure we read valid entries always */
> +	rmb();
> +
> +	DEBUG("ring %d: fetch: 0x%x, avail: 0x%x\n", index, fetch, avail);
> +
> +	while (fetch != avail) {
> +		cur = &dirty_gfns[fetch % TEST_DIRTY_RING_COUNT];
> +		TEST_ASSERT(cur->pad == 0, "Padding is non-zero: 0x%x", cur->pad);
> +		TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
> +			    "%u != %u", cur->slot, slot);
> +		TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
> +			    "0x%llx >= 0x%llx", cur->offset, num_pages);
> +		DEBUG("fetch 0x%x offset 0x%llx\n", fetch, cur->offset);
> +		test_and_set_bit(cur->offset, bitmap);
> +		fetch++;


Any reason to use test_and_set_bit()? I guess set_bit() should be 
sufficient.


> +		count++;
> +	}
> +	WRITE_ONCE(indices->fetch_index, fetch);


Is WRITE_ONCE a must here?


> +
> +	return count;
> +}
> +
> +static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
> +					   void *bitmap, uint32_t num_pages)
> +{
> +	/* We only have one vcpu */
> +	struct kvm_run *state = vcpu_state(vm, VCPU_ID);
> +	uint32_t count = 0, cleared;
> +
> +	/*
> +	 * Before fetching the dirty pages, we need a vmexit of the
> +	 * worker vcpu to make sure the hardware dirty buffers were
> +	 * flushed.  This is not needed for dirty-log/clear-log tests
> +	 * because get dirty log will natually do so.
> +	 *
> +	 * For now we do it in the simple way - we simply wait until
> +	 * the vcpu uses up the soft dirty ring, then it'll always
> +	 * do a vmexit to make sure that PML buffers will be flushed.
> +	 * In real hypervisors, we probably need a vcpu kick or to
> +	 * stop the vcpus (before the final sync) to make sure we'll
> +	 * get all the existing dirty PFNs even cached in hardware.
> +	 */
> +	sem_wait(&dirty_ring_vcpu_stop);
> +
> +	/* Only have one vcpu */
> +	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
> +				       &state->vcpu_ring_indices,
> +				       slot, bitmap, num_pages, VCPU_ID);
> +
> +	cleared = kvm_vm_reset_dirty_ring(vm);
> +
> +	/* Cleared pages should be the same as collected */
> +	TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
> +		    "with collected (%u)", cleared, count);
> +
> +	DEBUG("Notifying vcpu to continue\n");
> +	sem_post(&dirty_ring_vcpu_cont);
> +
> +	DEBUG("Iteration %ld collected %u pages\n", iteration, count);
> +}
> +
> +static void dirty_ring_after_vcpu_run(struct kvm_vm *vm)
> +{
> +	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
> +
> +	/* A ucall-sync or ring-full event is allowed */
> +	if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
> +		/* We should allow this to continue */
> +		;
> +	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
> +		sem_post(&dirty_ring_vcpu_stop);
> +		DEBUG("vcpu stops because dirty ring full...\n");
> +		sem_wait(&dirty_ring_vcpu_cont);
> +		DEBUG("vcpu continues now.\n");
> +	} else {
> +		TEST_ASSERT(false, "Invalid guest sync status: "
> +			    "exit_reason=%s\n",
> +			    exit_reason_str(run->exit_reason));
> +	}
> +}
> +
> +static void dirty_ring_before_vcpu_join(void)
> +{
> +	/* Kick another round of vcpu just to make sure it will quit */
> +	sem_post(&dirty_ring_vcpu_cont);
> +}
> +
>   struct log_mode {
>   	const char *name;
>   	/* Hook when the vm creation is done (before vcpu creation) */
> @@ -186,6 +309,7 @@ struct log_mode {
>   				     void *bitmap, uint32_t num_pages);
>   	/* Hook to call when after each vcpu run */
>   	void (*after_vcpu_run)(struct kvm_vm *vm);
> +	void (*before_vcpu_join) (void);
>   } log_modes[LOG_MODE_NUM] = {
>   	{
>   		.name = "dirty-log",
> @@ -199,6 +323,13 @@ struct log_mode {
>   		.collect_dirty_pages = clear_log_collect_dirty_pages,
>   		.after_vcpu_run = default_after_vcpu_run,
>   	},
> +	{
> +		.name = "dirty-ring",
> +		.create_vm_done = dirty_ring_create_vm_done,
> +		.collect_dirty_pages = dirty_ring_collect_dirty_pages,
> +		.before_vcpu_join = dirty_ring_before_vcpu_join,
> +		.after_vcpu_run = dirty_ring_after_vcpu_run,
> +	},
>   };
>   
>   /*
> @@ -245,6 +376,14 @@ static void log_mode_after_vcpu_run(struct kvm_vm *vm)
>   		mode->after_vcpu_run(vm);
>   }
>   
> +static void log_mode_before_vcpu_join(void)
> +{
> +	struct log_mode *mode = &log_modes[host_log_mode];
> +
> +	if (mode->before_vcpu_join)
> +		mode->before_vcpu_join();
> +}
> +
>   static void generate_random_array(uint64_t *guest_array, uint64_t size)
>   {
>   	uint64_t i;
> @@ -292,14 +431,41 @@ static void vm_dirty_log_verify(unsigned long *bmap)
>   		}
>   
>   		if (test_and_clear_bit_le(page, bmap)) {
> +			bool matched;
> +
>   			host_dirty_count++;
> +
>   			/*
>   			 * If the bit is set, the value written onto
>   			 * the corresponding page should be either the
>   			 * previous iteration number or the current one.
> +			 *
> +			 * (*value_ptr == iteration - 2) case is
> +			 * special only for dirty ring test where the
> +			 * page is the last page before a kvm dirty
> +			 * ring full userspace exit of the 2nd
> +			 * iteration, if without this we'll probably
> +			 * fail on the 4th iteration.  Anyway, let's
> +			 * just loose the test case a little bit for
> +			 * all for simplicity.
>   			 */
> -			TEST_ASSERT(*value_ptr == iteration ||
> -				    *value_ptr == iteration - 1,
> +			matched = (*value_ptr == iteration ||
> +				   *value_ptr == iteration - 1 ||
> +				   *value_ptr == iteration - 2);
> +
> +			/*
> +			 * This is the common path for dirty ring
> +			 * where this page is exactly the last page
> +			 * touched before KVM_EXIT_DIRTY_RING_FULL.
> +			 * If it happens, we should expect it to be
> +			 * there for the next round.
> +			 */
> +			if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
> +				set_bit_le(page, host_bmap_track);
> +				continue;
> +			}
> +
> +			TEST_ASSERT(matched,
>   				    "Set page %"PRIu64" value %"PRIu64
>   				    " incorrect (iteration=%"PRIu64")",
>   				    page, *value_ptr, iteration);
> @@ -460,6 +626,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
>   
>   	/* Tell the vcpu thread to quit */
>   	host_quit = true;
> +	log_mode_before_vcpu_join();
>   	pthread_join(vcpu_thread, NULL);
>   
>   	DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
> @@ -524,6 +691,9 @@ int main(int argc, char *argv[])
>   	unsigned int host_ipa_limit;
>   #endif
>   
> +	sem_init(&dirty_ring_vcpu_stop, 0, 0);
> +	sem_init(&dirty_ring_vcpu_cont, 0, 0);
> +
>   #ifdef __x86_64__
>   	vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
>   #endif
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 29cccaf96baf..4b78a8d3e773 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -67,6 +67,7 @@ enum vm_mem_backing_src_type {
>   
>   int kvm_check_cap(long cap);
>   int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
> +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
>   
>   struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
>   struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
> @@ -76,6 +77,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
>   void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
>   void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
>   			    uint64_t first_page, uint32_t num_pages);
> +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
>   
>   int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
>   		       size_t len);
> @@ -137,6 +139,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
>   int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
>   			  struct kvm_nested_state *state, bool ignore_error);
>   #endif
> +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
>   
>   const char *exit_reason_str(unsigned int exit_reason);
>   
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 41cf45416060..a119717bc84c 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -85,6 +85,26 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
>   	return ret;
>   }
>   
> +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
> +{
> +	struct kvm_enable_cap cap = {};
> +	int ret;
> +
> +	ret = kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
> +
> +	TEST_ASSERT(ret >= 0, "KVM_CAP_DIRTY_LOG_RING");
> +
> +	if (ret == 0) {
> +		fprintf(stderr, "KVM does not support dirty ring, skipping tests\n");
> +		exit(KSFT_SKIP);
> +	}
> +
> +	cap.cap = KVM_CAP_DIRTY_LOG_RING;
> +	cap.args[0] = ring_size;
> +	vm_enable_cap(vm, &cap);
> +	vm->dirty_ring_size = ring_size;
> +}
> +
>   static void vm_open(struct kvm_vm *vm, int perm)
>   {
>   	vm->kvm_fd = open(KVM_DEV_PATH, perm);
> @@ -297,6 +317,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
>   		    strerror(-ret));
>   }
>   
> +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
> +{
> +	return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
> +}
> +
>   /*
>    * Userspace Memory Region Find
>    *
> @@ -408,6 +433,13 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
>   	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
>   	int ret;
>   
> +	if (vcpu->dirty_gfns) {
> +		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
> +		TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
> +			    "rc: %i errno: %i", ret, errno);
> +		vcpu->dirty_gfns = NULL;
> +	}
> +
>   	ret = munmap(vcpu->state, sizeof(*vcpu->state));
>   	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
>   		"errno: %i", ret, errno);
> @@ -1409,6 +1441,29 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
>   	return ret;
>   }
>   
> +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
> +{
> +	struct vcpu *vcpu;
> +	uint32_t size = vm->dirty_ring_size;
> +
> +	TEST_ASSERT(size > 0, "Should enable dirty ring first");
> +
> +	vcpu = vcpu_find(vm, vcpuid);
> +
> +	TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
> +
> +	if (!vcpu->dirty_gfns) {
> +		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
> +		vcpu->dirty_gfns = mmap(NULL, size, PROT_READ | PROT_WRITE,
> +					MAP_SHARED, vcpu->fd, vm->page_size *
> +					KVM_DIRTY_LOG_PAGE_OFFSET);


It looks to me that we don't write to dirty_gfn.

So PROT_READ should be sufficient.

Thanks


> +		TEST_ASSERT(vcpu->dirty_gfns != MAP_FAILED,
> +			    "Dirty ring map failed");
> +	}
> +
> +	return vcpu->dirty_gfns;
> +}
> +
>   /*
>    * VM Ioctl
>    *
> @@ -1503,6 +1558,7 @@ static struct exit_reason {
>   	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
>   	{KVM_EXIT_OSI, "OSI"},
>   	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
> +	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
>   #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
>   	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
>   #endif
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
> index ac50c42750cf..87edcc6746a2 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
> +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
> @@ -39,6 +39,8 @@ struct vcpu {
>   	uint32_t id;
>   	int fd;
>   	struct kvm_run *state;
> +	struct kvm_dirty_gfn *dirty_gfns;
> +	uint32_t dirty_gfns_count;
>   };
>   
>   struct kvm_vm {
> @@ -61,6 +63,7 @@ struct kvm_vm {
>   	vm_paddr_t pgd;
>   	vm_vaddr_t gdt;
>   	vm_vaddr_t tss;
> +	uint32_t dirty_ring_size;
>   };
>   
>   struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);