lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f8e59e80-33b2-47cd-a042-11f28cc61645@huawei.com>
Date: Wed, 24 Dec 2025 14:15:16 +0800
From: Tian Zheng <zhengtian10@...wei.com>
To: Robert Hoo <robert.hoo.linux@...il.com>, Tian Zheng
	<zhengtian10@...wei.com>, <maz@...nel.org>, <oliver.upton@...ux.dev>,
	<catalin.marinas@....com>, <corbet@....net>, <pbonzini@...hat.com>,
	<will@...nel.org>
CC: <linux-kernel@...r.kernel.org>, <yuzenghui@...wei.com>,
	<wangzhou1@...ilicon.com>, <yezhenyu2@...wei.com>, <xiexiangyou@...wei.com>,
	<zhengchuan@...wei.com>, <joey.gouly@....com>, <kvmarm@...ts.linux.dev>,
	<kvm@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
	<linux-doc@...r.kernel.org>, <suzuki.poulose@....com>
Subject: Re: [PATCH v2 4/5] KVM: arm64: Enable HDBSS support and handle HDBSSF
 events



On 12/17/2025 9:39 PM, Robert Hoo wrote:
> On 11/21/2025 5:23 PM, Tian Zheng wrote:
>> From: eillon <yezhenyu2@...wei.com>
>>
>> Implement the HDBSS enable/disable functionality using the
>> KVM_CAP_ARM_HW_DIRTY_STATE_TRACK ioctl.
>>
>> Userspace (e.g., QEMU) can enable HDBSS by invoking the ioctl
>> at the start of live migration, configuring the buffer size.
>> The feature is disabled by invoking the ioctl again with size
>> set to 0 once migration completes.
>>
>> Add support for updating the dirty bitmap based on the HDBSS
>> buffer. Similar to the x86 PML implementation, KVM flushes the
>> buffer on all VM-Exits, so running vCPUs only need to be kicked
>> to force a VM-Exit.
>>
>> Signed-off-by: eillon <yezhenyu2@...wei.com>
>> Signed-off-by: Tian Zheng <zhengtian10@...wei.com>
>> ---
>>   arch/arm64/include/asm/kvm_host.h |  10 +++
>>   arch/arm64/include/asm/kvm_mmu.h  |  17 +++++
>>   arch/arm64/kvm/arm.c              | 107 ++++++++++++++++++++++++++++++
>>   arch/arm64/kvm/handle_exit.c      |  45 +++++++++++++
>>   arch/arm64/kvm/hyp/vhe/switch.c   |   1 +
>>   arch/arm64/kvm/mmu.c              |  10 +++
>>   arch/arm64/kvm/reset.c            |   3 +
>>   include/linux/kvm_host.h          |   1 +
>>   8 files changed, 194 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/ 
>> asm/kvm_host.h
>> index d962932f0e5f..408e4c2b3d1a 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -87,6 +87,7 @@ int __init kvm_arm_init_sve(void);
>>   u32 __attribute_const__ kvm_target_cpu(void);
>>   void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>>   void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
>> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu);
>>
>>   struct kvm_hyp_memcache {
>>       phys_addr_t head;
>> @@ -793,6 +794,12 @@ struct vcpu_reset_state {
>>       bool        reset;
>>   };
>>
>> +struct vcpu_hdbss_state {
>> +    phys_addr_t base_phys;
>> +    u32 size;
>> +    u32 next_index;
>> +};
>> +
>>   struct vncr_tlb;
>>
>>   struct kvm_vcpu_arch {
>> @@ -897,6 +904,9 @@ struct kvm_vcpu_arch {
>>
>>       /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
>>       struct vncr_tlb    *vncr_tlb;
>> +
>> +    /* HDBSS registers info */
>> +    struct vcpu_hdbss_state hdbss;
>>   };
>>
>>   /*
>> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/ 
>> asm/kvm_mmu.h
>> index e4069f2ce642..6ace1080aed5 100644
>> --- a/arch/arm64/include/asm/kvm_mmu.h
>> +++ b/arch/arm64/include/asm/kvm_mmu.h
>> @@ -331,6 +331,23 @@ static __always_inline void __load_stage2(struct 
>> kvm_s2_mmu *mmu,
>>       asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
>>   }
>>
>> +static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu)
>> +{
>> +    struct kvm *kvm = vcpu->kvm;
>> +    u64 br_el2, prod_el2;
>> +
>> +    if (!kvm->enable_hdbss)
>> +        return;
>> +
>> +    br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu- 
>> >arch.hdbss.size);
>> +    prod_el2 = vcpu->arch.hdbss.next_index;
>> +
>> +    write_sysreg_s(br_el2, SYS_HDBSSBR_EL2);
>> +    write_sysreg_s(prod_el2, SYS_HDBSSPROD_EL2);
>> +
>> +    isb();
>> +}
>> +
>>   static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
>>   {
>>       return container_of(mmu->arch, struct kvm, arch);
>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index 870953b4a8a7..64f65e3c2a89 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -79,6 +79,92 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>>       return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
>>   }
>>
>> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu)
>> +{
>> +    struct page *hdbss_pg = NULL;
>> +
>> +    hdbss_pg = phys_to_page(vcpu->arch.hdbss.base_phys);
>> +    if (hdbss_pg)
>> +        __free_pages(hdbss_pg, vcpu->arch.hdbss.size);
>> +
>> +    vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
>> +        .base_phys = 0,
>> +        .size = 0,
>> +        .next_index = 0,
>> +    };
>> +}
>> +
>> +static int kvm_cap_arm_enable_hdbss(struct kvm *kvm,
>> +                    struct kvm_enable_cap *cap)
>> +{
>> +    unsigned long i;
>> +    struct kvm_vcpu *vcpu;
>> +    struct page *hdbss_pg = NULL;
>> +    int size = cap->args[0];
>> +    int ret = 0;
>> +
>> +    if (!system_supports_hdbss()) {
>> +        kvm_err("This system does not support HDBSS!\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (size < 0 || size > HDBSS_MAX_SIZE) {
>> +        kvm_err("Invalid HDBSS buffer size: %d!\n", size);
>> +        return -EINVAL;
>> +    }
>> +
> 
> I think you should check if it's already enabled here. What if user 
> space calls this twice?

Ok, I review the implement of qemu, when disable the hdbss feature in
ram_save_cleanup, size=0 will be set, so here can add a check, if (size
&& kvm->arch.enable_hdbss), we will do nothing.

> 
>> +    /* Enable the HDBSS feature if size > 0, otherwise disable it. */
>> +    if (size) {
>> +        kvm_for_each_vcpu(i, vcpu, kvm) {
>> +            hdbss_pg = alloc_pages(GFP_KERNEL_ACCOUNT, size);
>> +            if (!hdbss_pg) {
>> +                kvm_err("Alloc HDBSS buffer failed!\n");
>> +                ret = -ENOMEM;
>> +                goto error_alloc;
>> +            }
>> +
>> +            vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
>> +                .base_phys = page_to_phys(hdbss_pg),
>> +                .size = size,
>> +                .next_index = 0,
>> +            };
>> +        }
>> +
>> +        kvm->enable_hdbss = true;
>> +        kvm->arch.mmu.vtcr |= VTCR_EL2_HD | VTCR_EL2_HDBSS;
> 
> VTCR_EL2_HA is also a necessity for VTCR_EL2_HDBSS to take effect.

I see, thanks! I checked the architecture spec, and it indeed states
that HA needs to be enabled for AF hardware management to function
properly.

> 
>> +
>> +        /*
>> +         * We should kick vcpus out of guest mode here to load new
>> +         * vtcr value to vtcr_el2 register when re-enter guest mode.
>> +         */
>> +        kvm_for_each_vcpu(i, vcpu, kvm)
>> +            kvm_vcpu_kick(vcpu);
>> +    } else if (kvm->enable_hdbss) {
>> +        kvm->arch.mmu.vtcr &= ~(VTCR_EL2_HD | VTCR_EL2_HDBSS);
>> +
>> +        kvm_for_each_vcpu(i, vcpu, kvm) {
>> +            /* Kick vcpus to flush hdbss buffer. */
>> +            kvm_vcpu_kick(vcpu);
>> +
>> +            kvm_arm_vcpu_free_hdbss(vcpu);
>> +        }
>> +
>> +        kvm->enable_hdbss = false;
>> +    }
>> +
>> +    return ret;
>> +
>> +error_alloc:
>> +    kvm_for_each_vcpu(i, vcpu, kvm) {
>> +        if (!vcpu->arch.hdbss.base_phys && !vcpu->arch.hdbss.size)
>> +            continue;
>> +
>> +        kvm_arm_vcpu_free_hdbss(vcpu);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>>   int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>>                   struct kvm_enable_cap *cap)
>>   {
>> @@ -132,6 +218,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>>           }
>>           mutex_unlock(&kvm->lock);
>>           break;
>> +    case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
>> +        mutex_lock(&kvm->lock);
>> +        r = kvm_cap_arm_enable_hdbss(kvm, cap);
>> +        mutex_unlock(&kvm->lock);
>> +        break;
>>       default:
>>           break;
>>       }
>> @@ -420,6 +511,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, 
>> long ext)
>>               r = kvm_supports_cacheable_pfnmap();
>>           break;
>>
>> +    case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
>> +        r = system_supports_hdbss();
>> +        break;
>>       default:
>>           r = 0;
>>       }
>> @@ -1837,7 +1931,20 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>>
>>   void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot 
>> *memslot)
>>   {
>> +    /*
>> +     * Flush all CPUs' dirty log buffers to the dirty_bitmap.  Called
>> +     * before reporting dirty_bitmap to userspace.  KVM flushes the 
>> buffers
>> +     * on all VM-Exits, thus we only need to kick running vCPUs to 
>> force a
>> +     * VM-Exit.
>> +     */
>> +    struct kvm_vcpu *vcpu;
>> +    unsigned long i;
>>
>> +    if (!kvm->enable_hdbss)
>> +        return;
>> +
>> +    kvm_for_each_vcpu(i, vcpu, kvm)
>> +        kvm_vcpu_kick(vcpu);
>>   }
>>
>>   static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index cc7d5d1709cb..9ba0ea6305ef 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -412,6 +412,49 @@ static exit_handle_fn kvm_get_exit_handler(struct 
>> kvm_vcpu *vcpu)
>>       return arm_exit_handlers[esr_ec];
>>   }
>>
>> +static void kvm_flush_hdbss_buffer(struct kvm_vcpu *vcpu)
>> +{
>> +    int idx, curr_idx;
>> +    u64 *hdbss_buf;
>> +    struct kvm *kvm = vcpu->kvm;
>> +    u64 br_el2;
>> +
>> +    if (!kvm->enable_hdbss)
>> +        return;
>> +
>> +    dsb(sy);
>> +    isb();
>> +    curr_idx = HDBSSPROD_IDX(read_sysreg_s(SYS_HDBSSPROD_EL2));
>> +    br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu- 
>> >arch.hdbss.size);
>> +
>> +    /* Do nothing if HDBSS buffer is empty or br_el2 is NULL */
>> +    if (curr_idx == 0 || br_el2 == 0)
>> +        return;
>> +
>> +    hdbss_buf = page_address(phys_to_page(vcpu->arch.hdbss.base_phys));
>> +    if (!hdbss_buf) {
>> +        kvm_err("Enter flush hdbss buffer with buffer == NULL!");
>> +        return;
>> +    }
>> +
>> +    guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
>> +    for (idx = 0; idx < curr_idx; idx++) {
>> +        u64 gpa;
>> +
>> +        gpa = hdbss_buf[idx];
>> +        if (!(gpa & HDBSS_ENTRY_VALID))
>> +            continue;
>> +
>> +        gpa &= HDBSS_ENTRY_IPA;
>> +        kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
>> +    }
>> +
>> +    /* reset HDBSS index */
>> +    write_sysreg_s(0, SYS_HDBSSPROD_EL2);
>> +    vcpu->arch.hdbss.next_index = 0;
>> +    isb();
>> +}
>> +
>>   /*
>>    * We may be single-stepping an emulated instruction. If the emulation
>>    * has been completed in the kernel, we can return to userspace with a
>> @@ -447,6 +490,8 @@ int handle_exit(struct kvm_vcpu *vcpu, int 
>> exception_index)
>>   {
>>       struct kvm_run *run = vcpu->run;
>>
>> +    kvm_flush_hdbss_buffer(vcpu);
>> +
>>       if (ARM_SERROR_PENDING(exception_index)) {
>>           /*
>>            * The SError is handled by handle_exit_early(). If the guest
>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/ 
>> switch.c
>> index 9984c492305a..3787c9c5810d 100644
>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>> @@ -220,6 +220,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
>>       __vcpu_load_switch_sysregs(vcpu);
>>       __vcpu_load_activate_traps(vcpu);
>>       __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
>> +    __load_hdbss(vcpu);
>>   }
>>
>>   void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index 7cc964af8d30..91a2f9dbb406 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -1843,6 +1843,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
>> phys_addr_t fault_ipa,
>>       if (writable)
>>           prot |= KVM_PGTABLE_PROT_W;
>>
>> +    if (writable && kvm->enable_hdbss && logging_active)
>> +        prot |= KVM_PGTABLE_PROT_DBM;
>> +
>>       if (exec_fault)
>>           prot |= KVM_PGTABLE_PROT_X;
>>
>> @@ -1950,6 +1953,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>>
>>       is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
>>
>> +    /*
>> +     * HDBSS buffer already flushed when enter handle_trap_exceptions().
>> +     * Nothing to do here.
>> +     */
>> +    if (ESR_ELx_ISS2(esr) & ESR_ELx_HDBSSF)
>> +        return 1;
>> +
>>       if (esr_fsc_is_translation_fault(esr)) {
>>           /* Beyond sanitised PARange (which is the IPA limit) */
>>           if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index 959532422d3a..65e8f890f863 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -161,6 +161,9 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
>>       free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
>>       kfree(vcpu->arch.vncr_tlb);
>>       kfree(vcpu->arch.ccsidr);
>> +
>> +    if (vcpu->arch.hdbss.base_phys || vcpu->arch.hdbss.size)
>> +        kvm_arm_vcpu_free_hdbss(vcpu);
>>   }
>>
>>   static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 5bd76cf394fa..aa8138604b1e 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -876,6 +876,7 @@ struct kvm {
>>       struct xarray mem_attr_array;
>>   #endif
>>       char stats_id[KVM_STATS_NAME_SIZE];
>> +    bool enable_hdbss;
>>   };
>>
>>   #define kvm_err(fmt, ...) \
>> -- 
>> 2.33.0
>>
>>
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ