[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f8e59e80-33b2-47cd-a042-11f28cc61645@huawei.com>
Date: Wed, 24 Dec 2025 14:15:16 +0800
From: Tian Zheng <zhengtian10@...wei.com>
To: Robert Hoo <robert.hoo.linux@...il.com>, Tian Zheng
<zhengtian10@...wei.com>, <maz@...nel.org>, <oliver.upton@...ux.dev>,
<catalin.marinas@....com>, <corbet@....net>, <pbonzini@...hat.com>,
<will@...nel.org>
CC: <linux-kernel@...r.kernel.org>, <yuzenghui@...wei.com>,
<wangzhou1@...ilicon.com>, <yezhenyu2@...wei.com>, <xiexiangyou@...wei.com>,
<zhengchuan@...wei.com>, <joey.gouly@....com>, <kvmarm@...ts.linux.dev>,
<kvm@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
<linux-doc@...r.kernel.org>, <suzuki.poulose@....com>
Subject: Re: [PATCH v2 4/5] KVM: arm64: Enable HDBSS support and handle HDBSSF
events
On 12/17/2025 9:39 PM, Robert Hoo wrote:
> On 11/21/2025 5:23 PM, Tian Zheng wrote:
>> From: eillon <yezhenyu2@...wei.com>
>>
>> Implement the HDBSS enable/disable functionality using the
>> KVM_CAP_ARM_HW_DIRTY_STATE_TRACK ioctl.
>>
>> Userspace (e.g., QEMU) can enable HDBSS by invoking the ioctl
>> at the start of live migration, configuring the buffer size.
>> The feature is disabled by invoking the ioctl again with size
>> set to 0 once migration completes.
>>
>> Add support for updating the dirty bitmap based on the HDBSS
>> buffer. Similar to the x86 PML implementation, KVM flushes the
>> buffer on all VM-Exits, so running vCPUs only need to be kicked
>> to force a VM-Exit.
>>
>> Signed-off-by: eillon <yezhenyu2@...wei.com>
>> Signed-off-by: Tian Zheng <zhengtian10@...wei.com>
>> ---
>> arch/arm64/include/asm/kvm_host.h | 10 +++
>> arch/arm64/include/asm/kvm_mmu.h | 17 +++++
>> arch/arm64/kvm/arm.c | 107 ++++++++++++++++++++++++++++++
>> arch/arm64/kvm/handle_exit.c | 45 +++++++++++++
>> arch/arm64/kvm/hyp/vhe/switch.c | 1 +
>> arch/arm64/kvm/mmu.c | 10 +++
>> arch/arm64/kvm/reset.c | 3 +
>> include/linux/kvm_host.h | 1 +
>> 8 files changed, 194 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/
>> asm/kvm_host.h
>> index d962932f0e5f..408e4c2b3d1a 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -87,6 +87,7 @@ int __init kvm_arm_init_sve(void);
>> u32 __attribute_const__ kvm_target_cpu(void);
>> void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>> void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
>> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu);
>>
>> struct kvm_hyp_memcache {
>> phys_addr_t head;
>> @@ -793,6 +794,12 @@ struct vcpu_reset_state {
>> bool reset;
>> };
>>
>> +struct vcpu_hdbss_state {
>> + phys_addr_t base_phys;
>> + u32 size;
>> + u32 next_index;
>> +};
>> +
>> struct vncr_tlb;
>>
>> struct kvm_vcpu_arch {
>> @@ -897,6 +904,9 @@ struct kvm_vcpu_arch {
>>
>> /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
>> struct vncr_tlb *vncr_tlb;
>> +
>> + /* HDBSS registers info */
>> + struct vcpu_hdbss_state hdbss;
>> };
>>
>> /*
>> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/
>> asm/kvm_mmu.h
>> index e4069f2ce642..6ace1080aed5 100644
>> --- a/arch/arm64/include/asm/kvm_mmu.h
>> +++ b/arch/arm64/include/asm/kvm_mmu.h
>> @@ -331,6 +331,23 @@ static __always_inline void __load_stage2(struct
>> kvm_s2_mmu *mmu,
>> asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
>> }
>>
>> +static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu)
>> +{
>> + struct kvm *kvm = vcpu->kvm;
>> + u64 br_el2, prod_el2;
>> +
>> + if (!kvm->enable_hdbss)
>> + return;
>> +
>> + br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu-
>> >arch.hdbss.size);
>> + prod_el2 = vcpu->arch.hdbss.next_index;
>> +
>> + write_sysreg_s(br_el2, SYS_HDBSSBR_EL2);
>> + write_sysreg_s(prod_el2, SYS_HDBSSPROD_EL2);
>> +
>> + isb();
>> +}
>> +
>> static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
>> {
>> return container_of(mmu->arch, struct kvm, arch);
>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index 870953b4a8a7..64f65e3c2a89 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -79,6 +79,92 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>> return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
>> }
>>
>> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu)
>> +{
>> + struct page *hdbss_pg = NULL;
>> +
>> + hdbss_pg = phys_to_page(vcpu->arch.hdbss.base_phys);
>> + if (hdbss_pg)
>> + __free_pages(hdbss_pg, vcpu->arch.hdbss.size);
>> +
>> + vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
>> + .base_phys = 0,
>> + .size = 0,
>> + .next_index = 0,
>> + };
>> +}
>> +
>> +static int kvm_cap_arm_enable_hdbss(struct kvm *kvm,
>> + struct kvm_enable_cap *cap)
>> +{
>> + unsigned long i;
>> + struct kvm_vcpu *vcpu;
>> + struct page *hdbss_pg = NULL;
>> + int size = cap->args[0];
>> + int ret = 0;
>> +
>> + if (!system_supports_hdbss()) {
>> + kvm_err("This system does not support HDBSS!\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (size < 0 || size > HDBSS_MAX_SIZE) {
>> + kvm_err("Invalid HDBSS buffer size: %d!\n", size);
>> + return -EINVAL;
>> + }
>> +
>
> I think you should check if it's already enabled here. What if user
> space calls this twice?
Ok, I review the implement of qemu, when disable the hdbss feature in
ram_save_cleanup, size=0 will be set, so here can add a check, if (size
&& kvm->arch.enable_hdbss), we will do nothing.
>
>> + /* Enable the HDBSS feature if size > 0, otherwise disable it. */
>> + if (size) {
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + hdbss_pg = alloc_pages(GFP_KERNEL_ACCOUNT, size);
>> + if (!hdbss_pg) {
>> + kvm_err("Alloc HDBSS buffer failed!\n");
>> + ret = -ENOMEM;
>> + goto error_alloc;
>> + }
>> +
>> + vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
>> + .base_phys = page_to_phys(hdbss_pg),
>> + .size = size,
>> + .next_index = 0,
>> + };
>> + }
>> +
>> + kvm->enable_hdbss = true;
>> + kvm->arch.mmu.vtcr |= VTCR_EL2_HD | VTCR_EL2_HDBSS;
>
> VTCR_EL2_HA is also a necessity for VTCR_EL2_HDBSS to take effect.
I see, thanks! I checked the architecture spec, and it indeed states
that HA needs to be enabled for AF hardware management to function
properly.
>
>> +
>> + /*
>> + * We should kick vcpus out of guest mode here to load new
>> + * vtcr value to vtcr_el2 register when re-enter guest mode.
>> + */
>> + kvm_for_each_vcpu(i, vcpu, kvm)
>> + kvm_vcpu_kick(vcpu);
>> + } else if (kvm->enable_hdbss) {
>> + kvm->arch.mmu.vtcr &= ~(VTCR_EL2_HD | VTCR_EL2_HDBSS);
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + /* Kick vcpus to flush hdbss buffer. */
>> + kvm_vcpu_kick(vcpu);
>> +
>> + kvm_arm_vcpu_free_hdbss(vcpu);
>> + }
>> +
>> + kvm->enable_hdbss = false;
>> + }
>> +
>> + return ret;
>> +
>> +error_alloc:
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + if (!vcpu->arch.hdbss.base_phys && !vcpu->arch.hdbss.size)
>> + continue;
>> +
>> + kvm_arm_vcpu_free_hdbss(vcpu);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>> struct kvm_enable_cap *cap)
>> {
>> @@ -132,6 +218,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>> }
>> mutex_unlock(&kvm->lock);
>> break;
>> + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
>> + mutex_lock(&kvm->lock);
>> + r = kvm_cap_arm_enable_hdbss(kvm, cap);
>> + mutex_unlock(&kvm->lock);
>> + break;
>> default:
>> break;
>> }
>> @@ -420,6 +511,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm,
>> long ext)
>> r = kvm_supports_cacheable_pfnmap();
>> break;
>>
>> + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
>> + r = system_supports_hdbss();
>> + break;
>> default:
>> r = 0;
>> }
>> @@ -1837,7 +1931,20 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>>
>> void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot
>> *memslot)
>> {
>> + /*
>> + * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called
>> + * before reporting dirty_bitmap to userspace. KVM flushes the
>> buffers
>> + * on all VM-Exits, thus we only need to kick running vCPUs to
>> force a
>> + * VM-Exit.
>> + */
>> + struct kvm_vcpu *vcpu;
>> + unsigned long i;
>>
>> + if (!kvm->enable_hdbss)
>> + return;
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm)
>> + kvm_vcpu_kick(vcpu);
>> }
>>
>> static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index cc7d5d1709cb..9ba0ea6305ef 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -412,6 +412,49 @@ static exit_handle_fn kvm_get_exit_handler(struct
>> kvm_vcpu *vcpu)
>> return arm_exit_handlers[esr_ec];
>> }
>>
>> +static void kvm_flush_hdbss_buffer(struct kvm_vcpu *vcpu)
>> +{
>> + int idx, curr_idx;
>> + u64 *hdbss_buf;
>> + struct kvm *kvm = vcpu->kvm;
>> + u64 br_el2;
>> +
>> + if (!kvm->enable_hdbss)
>> + return;
>> +
>> + dsb(sy);
>> + isb();
>> + curr_idx = HDBSSPROD_IDX(read_sysreg_s(SYS_HDBSSPROD_EL2));
>> + br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu-
>> >arch.hdbss.size);
>> +
>> + /* Do nothing if HDBSS buffer is empty or br_el2 is NULL */
>> + if (curr_idx == 0 || br_el2 == 0)
>> + return;
>> +
>> + hdbss_buf = page_address(phys_to_page(vcpu->arch.hdbss.base_phys));
>> + if (!hdbss_buf) {
>> + kvm_err("Enter flush hdbss buffer with buffer == NULL!");
>> + return;
>> + }
>> +
>> + guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
>> + for (idx = 0; idx < curr_idx; idx++) {
>> + u64 gpa;
>> +
>> + gpa = hdbss_buf[idx];
>> + if (!(gpa & HDBSS_ENTRY_VALID))
>> + continue;
>> +
>> + gpa &= HDBSS_ENTRY_IPA;
>> + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
>> + }
>> +
>> + /* reset HDBSS index */
>> + write_sysreg_s(0, SYS_HDBSSPROD_EL2);
>> + vcpu->arch.hdbss.next_index = 0;
>> + isb();
>> +}
>> +
>> /*
>> * We may be single-stepping an emulated instruction. If the emulation
>> * has been completed in the kernel, we can return to userspace with a
>> @@ -447,6 +490,8 @@ int handle_exit(struct kvm_vcpu *vcpu, int
>> exception_index)
>> {
>> struct kvm_run *run = vcpu->run;
>>
>> + kvm_flush_hdbss_buffer(vcpu);
>> +
>> if (ARM_SERROR_PENDING(exception_index)) {
>> /*
>> * The SError is handled by handle_exit_early(). If the guest
>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/
>> switch.c
>> index 9984c492305a..3787c9c5810d 100644
>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>> @@ -220,6 +220,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
>> __vcpu_load_switch_sysregs(vcpu);
>> __vcpu_load_activate_traps(vcpu);
>> __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
>> + __load_hdbss(vcpu);
>> }
>>
>> void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index 7cc964af8d30..91a2f9dbb406 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -1843,6 +1843,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu,
>> phys_addr_t fault_ipa,
>> if (writable)
>> prot |= KVM_PGTABLE_PROT_W;
>>
>> + if (writable && kvm->enable_hdbss && logging_active)
>> + prot |= KVM_PGTABLE_PROT_DBM;
>> +
>> if (exec_fault)
>> prot |= KVM_PGTABLE_PROT_X;
>>
>> @@ -1950,6 +1953,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>>
>> is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
>>
>> + /*
>> + * HDBSS buffer already flushed when enter handle_trap_exceptions().
>> + * Nothing to do here.
>> + */
>> + if (ESR_ELx_ISS2(esr) & ESR_ELx_HDBSSF)
>> + return 1;
>> +
>> if (esr_fsc_is_translation_fault(esr)) {
>> /* Beyond sanitised PARange (which is the IPA limit) */
>> if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index 959532422d3a..65e8f890f863 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -161,6 +161,9 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
>> free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
>> kfree(vcpu->arch.vncr_tlb);
>> kfree(vcpu->arch.ccsidr);
>> +
>> + if (vcpu->arch.hdbss.base_phys || vcpu->arch.hdbss.size)
>> + kvm_arm_vcpu_free_hdbss(vcpu);
>> }
>>
>> static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 5bd76cf394fa..aa8138604b1e 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -876,6 +876,7 @@ struct kvm {
>> struct xarray mem_attr_array;
>> #endif
>> char stats_id[KVM_STATS_NAME_SIZE];
>> + bool enable_hdbss;
>> };
>>
>> #define kvm_err(fmt, ...) \
>> --
>> 2.33.0
>>
>>
>
Powered by blists - more mailing lists