[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250930172850.598938-1-jthoughton@google.com>
Date: Tue, 30 Sep 2025 17:28:49 +0000
From: James Houghton <jthoughton@...gle.com>
To: Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson <seanjc@...gle.com>
Cc: James Houghton <jthoughton@...gle.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH 1/2] KVM: For manual-protect GET_DIRTY_LOG, do not hold slots lock
For users that have enabled manual-protect, holding the srcu lock
instead of the slots lock allows KVM to copy the dirty bitmap for
multiple memslots in parallel.
Userspace can take advantage of this by creating multiple memslots and
calling GET_DIRTY_LOG on all of them at the same time, reducing the
dirty log collection time.
For VM live migration, the final dirty memory state can only be
collected after the VM has been paused (blackout). We can resume the VM
on the target host without this bitmap, but doing so requires the
post-copy implementation to assume that nothing is clean; this is very
slow. By being able to receive the bitmap quicker, VM responsiveness
improves dramatically.
On 12TiB Cascade Lake hosts, we observe GET_DIRTY_LOG times of about
25-40ms for each memslot when splitting the memslots 8 ways. This patch
reduces the total wall time spent calling GET_DIRTY_LOG from ~300ms to
~40ms. This means that the dirty log can be transferred to the target
~250ms faster, which is a significant responsiveness improvement. It
takes about 800ms to send the bitmap to the target today, so the 250ms
improvement represents a ~20% reduction in total time spent without the
dirty bitmap.
The bits that must be safe are:
1. The copy_to_user() to store the bitmap
2. kvm_arch_sync_dirty_log()
(1) is trivially safe.
(2) kvm_arch_sync_dirty_log() is non-trivially implemented for x86 and
s390. s390 does not set KVM_GENERIC_DIRTYLOG_READ_PROTECT, so the
optimization here does not apply. On x86, parallelization is safe.
The extra vCPU kicks that come from having more memslots should not be
an issue for the final dirty logging pass (the one I care about most
here), as vCPUs will have been kicked out to userspace at that point.
$ ./dirty_log_perf_test -x 8 -b 512G -s anonymous_hugetlb_1gb # serial
Iteration 1 get dirty log time: 0.004699057s
Iteration 2 get dirty log time: 0.003918316s
Iteration 3 get dirty log time: 0.003903790s
Iteration 4 get dirty log time: 0.003944732s
Iteration 5 get dirty log time: 0.003885857s
$ ./dirty_log_perf_test -x 8 -b 512G -s anonymous_hugetlb_1gb # parallel
Iteration 1 get dirty log time: 0.002352174s
Iteration 2 get dirty log time: 0.001064265s
Iteration 3 get dirty log time: 0.001102144s
Iteration 4 get dirty log time: 0.000960649s
Iteration 5 get dirty log time: 0.000972533s
So with 8 memslots, we get about a 4x reduction on this platform
(Skylake).
Signed-off-by: James Houghton <jthoughton@...gle.com>
---
include/linux/kvm_dirty_ring.h | 4 +-
virt/kvm/dirty_ring.c | 11 +++++-
virt/kvm/kvm_main.c | 68 +++++++++++++++++++++++-----------
3 files changed, 58 insertions(+), 25 deletions(-)
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index eb10d87adf7d..b7d18abec4d0 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -37,7 +37,7 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm)
return 0;
}
-static inline bool kvm_use_dirty_bitmap(struct kvm *kvm)
+static inline bool kvm_use_dirty_bitmap(struct kvm *kvm, bool shared)
{
return true;
}
@@ -73,7 +73,7 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
#else /* CONFIG_HAVE_KVM_DIRTY_RING */
int kvm_cpu_dirty_log_size(struct kvm *kvm);
-bool kvm_use_dirty_bitmap(struct kvm *kvm);
+bool kvm_use_dirty_bitmap(struct kvm *kvm, bool shared);
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm);
int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 02bc6b00d76c..6662c302a3fa 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -21,9 +21,16 @@ u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm)
return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(kvm);
}
-bool kvm_use_dirty_bitmap(struct kvm *kvm)
+bool kvm_use_dirty_bitmap(struct kvm *kvm, bool shared)
{
- lockdep_assert_held(&kvm->slots_lock);
+ if (shared)
+ /*
+ * In the shared mode, racing with dirty log mode changes is
+ * tolerated.
+ */
+ lockdep_assert_held(&kvm->srcu);
+ else
+ lockdep_assert_held(&kvm->slots_lock);
return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 18f29ef93543..bb1ec5556d76 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1687,7 +1687,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
new->dirty_bitmap = NULL;
else if (old && old->dirty_bitmap)
new->dirty_bitmap = old->dirty_bitmap;
- else if (kvm_use_dirty_bitmap(kvm)) {
+ else if (kvm_use_dirty_bitmap(kvm, false)) {
r = kvm_alloc_dirty_bitmap(new);
if (r)
return r;
@@ -2162,7 +2162,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
unsigned long any = 0;
/* Dirty ring tracking may be exclusive to dirty log tracking */
- if (!kvm_use_dirty_bitmap(kvm))
+ if (!kvm_use_dirty_bitmap(kvm, false))
return -ENXIO;
*memslot = NULL;
@@ -2216,7 +2216,8 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
* exiting to userspace will be logged for the next call.
*
*/
-static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
+static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log,
+ bool protect)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -2224,10 +2225,9 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
- bool flush;
/* Dirty ring tracking may be exclusive to dirty log tracking */
- if (!kvm_use_dirty_bitmap(kvm))
+ if (!kvm_use_dirty_bitmap(kvm, !protect))
return -ENXIO;
as_id = log->slot >> 16;
@@ -2242,21 +2242,30 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
dirty_bitmap = memslot->dirty_bitmap;
+ if (protect)
+ lockdep_assert_held(&kvm->slots_lock);
+ else
+ lockdep_assert_held(&kvm->srcu);
+ /*
+ * kvm_arch_sync_dirty_log() must be safe to call with either kvm->srcu
+ * held OR the slots lock held.
+ */
kvm_arch_sync_dirty_log(kvm, memslot);
n = kvm_dirty_bitmap_bytes(memslot);
- flush = false;
- if (kvm->manual_dirty_log_protect) {
+ if (!protect) {
/*
- * Unlike kvm_get_dirty_log, we always return false in *flush,
- * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There
- * is some code duplication between this function and
- * kvm_get_dirty_log, but hopefully all architecture
- * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log
- * can be eliminated.
+ * Unlike kvm_get_dirty_log, we never flush, because no flush is
+ * needed until KVM_CLEAR_DIRTY_LOG. There is some code
+ * duplication between this function and kvm_get_dirty_log, but
+ * hopefully all architecture transition to
+ * kvm_get_dirty_log_protect and kvm_get_dirty_log can be
+ * eliminated.
*/
dirty_bitmap_buffer = dirty_bitmap;
} else {
+ bool flush;
+
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
memset(dirty_bitmap_buffer, 0, n);
@@ -2277,10 +2286,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
offset, mask);
}
KVM_MMU_UNLOCK(kvm);
- }
- if (flush)
- kvm_flush_remote_tlbs_memslot(kvm, memslot);
+ if (flush)
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
+ }
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
return -EFAULT;
@@ -2310,13 +2319,30 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- int r;
+ bool protect;
+ int r, idx;
- mutex_lock(&kvm->slots_lock);
+ /*
+ * Only protect if manual protection isn't enabled.
+ */
+ protect = !kvm->manual_dirty_log_protect;
- r = kvm_get_dirty_log_protect(kvm, log);
+ /*
+ * If we are only collecting the dlrty log and not clearing it,
+ * the srcu lock is sufficient.
+ */
+ if (protect)
+ mutex_lock(&kvm->slots_lock);
+ else
+ idx = srcu_read_lock(&kvm->srcu);
+
+ r = kvm_get_dirty_log_protect(kvm, log, protect);
+
+ if (protect)
+ mutex_unlock(&kvm->slots_lock);
+ else
+ srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->slots_lock);
return r;
}
@@ -2339,7 +2365,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
bool flush;
/* Dirty ring tracking may be exclusive to dirty log tracking */
- if (!kvm_use_dirty_bitmap(kvm))
+ if (!kvm_use_dirty_bitmap(kvm, false))
return -ENXIO;
as_id = log->slot >> 16;
base-commit: a6ad54137af92535cfe32e19e5f3bc1bb7dbd383
--
2.51.0.618.g983fd99d29-goog
Powered by blists - more mailing lists