lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87df4cba-b191-49cf-9486-fc379470a6eb@gmail.com>
Date: Wed, 17 Dec 2025 21:39:45 +0800
From: Robert Hoo <robert.hoo.linux@...il.com>
To: Tian Zheng <zhengtian10@...wei.com>, maz@...nel.org,
 oliver.upton@...ux.dev, catalin.marinas@....com, corbet@....net,
 pbonzini@...hat.com, will@...nel.org
Cc: linux-kernel@...r.kernel.org, yuzenghui@...wei.com,
 wangzhou1@...ilicon.com, yezhenyu2@...wei.com, xiexiangyou@...wei.com,
 zhengchuan@...wei.com, linuxarm@...wei.com, joey.gouly@....com,
 kvmarm@...ts.linux.dev, kvm@...r.kernel.org,
 linux-arm-kernel@...ts.infradead.org, linux-doc@...r.kernel.org,
 suzuki.poulose@....com
Subject: Re: [PATCH v2 4/5] KVM: arm64: Enable HDBSS support and handle HDBSSF
 events

On 11/21/2025 5:23 PM, Tian Zheng wrote:
> From: eillon <yezhenyu2@...wei.com>
> 
> Implement the HDBSS enable/disable functionality using the
> KVM_CAP_ARM_HW_DIRTY_STATE_TRACK ioctl.
> 
> Userspace (e.g., QEMU) can enable HDBSS by invoking the ioctl
> at the start of live migration, configuring the buffer size.
> The feature is disabled by invoking the ioctl again with size
> set to 0 once migration completes.
> 
> Add support for updating the dirty bitmap based on the HDBSS
> buffer. Similar to the x86 PML implementation, KVM flushes the
> buffer on all VM-Exits, so running vCPUs only need to be kicked
> to force a VM-Exit.
> 
> Signed-off-by: eillon <yezhenyu2@...wei.com>
> Signed-off-by: Tian Zheng <zhengtian10@...wei.com>
> ---
>   arch/arm64/include/asm/kvm_host.h |  10 +++
>   arch/arm64/include/asm/kvm_mmu.h  |  17 +++++
>   arch/arm64/kvm/arm.c              | 107 ++++++++++++++++++++++++++++++
>   arch/arm64/kvm/handle_exit.c      |  45 +++++++++++++
>   arch/arm64/kvm/hyp/vhe/switch.c   |   1 +
>   arch/arm64/kvm/mmu.c              |  10 +++
>   arch/arm64/kvm/reset.c            |   3 +
>   include/linux/kvm_host.h          |   1 +
>   8 files changed, 194 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d962932f0e5f..408e4c2b3d1a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -87,6 +87,7 @@ int __init kvm_arm_init_sve(void);
>   u32 __attribute_const__ kvm_target_cpu(void);
>   void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>   void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu);
> 
>   struct kvm_hyp_memcache {
>   	phys_addr_t head;
> @@ -793,6 +794,12 @@ struct vcpu_reset_state {
>   	bool		reset;
>   };
> 
> +struct vcpu_hdbss_state {
> +	phys_addr_t base_phys;
> +	u32 size;
> +	u32 next_index;
> +};
> +
>   struct vncr_tlb;
> 
>   struct kvm_vcpu_arch {
> @@ -897,6 +904,9 @@ struct kvm_vcpu_arch {
> 
>   	/* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
>   	struct vncr_tlb	*vncr_tlb;
> +
> +	/* HDBSS registers info */
> +	struct vcpu_hdbss_state hdbss;
>   };
> 
>   /*
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index e4069f2ce642..6ace1080aed5 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -331,6 +331,23 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
>   	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
>   }
> 
> +static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	u64 br_el2, prod_el2;
> +
> +	if (!kvm->enable_hdbss)
> +		return;
> +
> +	br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu->arch.hdbss.size);
> +	prod_el2 = vcpu->arch.hdbss.next_index;
> +
> +	write_sysreg_s(br_el2, SYS_HDBSSBR_EL2);
> +	write_sysreg_s(prod_el2, SYS_HDBSSPROD_EL2);
> +
> +	isb();
> +}
> +
>   static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
>   {
>   	return container_of(mmu->arch, struct kvm, arch);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 870953b4a8a7..64f65e3c2a89 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -79,6 +79,92 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>   	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
>   }
> 
> +void kvm_arm_vcpu_free_hdbss(struct kvm_vcpu *vcpu)
> +{
> +	struct page *hdbss_pg = NULL;
> +
> +	hdbss_pg = phys_to_page(vcpu->arch.hdbss.base_phys);
> +	if (hdbss_pg)
> +		__free_pages(hdbss_pg, vcpu->arch.hdbss.size);
> +
> +	vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
> +		.base_phys = 0,
> +		.size = 0,
> +		.next_index = 0,
> +	};
> +}
> +
> +static int kvm_cap_arm_enable_hdbss(struct kvm *kvm,
> +				    struct kvm_enable_cap *cap)
> +{
> +	unsigned long i;
> +	struct kvm_vcpu *vcpu;
> +	struct page *hdbss_pg = NULL;
> +	int size = cap->args[0];
> +	int ret = 0;
> +
> +	if (!system_supports_hdbss()) {
> +		kvm_err("This system does not support HDBSS!\n");
> +		return -EINVAL;
> +	}
> +
> +	if (size < 0 || size > HDBSS_MAX_SIZE) {
> +		kvm_err("Invalid HDBSS buffer size: %d!\n", size);
> +		return -EINVAL;
> +	}
> +

I think you should check if it's already enabled here. What if user space calls 
this twice?

> +	/* Enable the HDBSS feature if size > 0, otherwise disable it. */
> +	if (size) {
> +		kvm_for_each_vcpu(i, vcpu, kvm) {
> +			hdbss_pg = alloc_pages(GFP_KERNEL_ACCOUNT, size);
> +			if (!hdbss_pg) {
> +				kvm_err("Alloc HDBSS buffer failed!\n");
> +				ret = -ENOMEM;
> +				goto error_alloc;
> +			}
> +
> +			vcpu->arch.hdbss = (struct vcpu_hdbss_state) {
> +				.base_phys = page_to_phys(hdbss_pg),
> +				.size = size,
> +				.next_index = 0,
> +			};
> +		}
> +
> +		kvm->enable_hdbss = true;
> +		kvm->arch.mmu.vtcr |= VTCR_EL2_HD | VTCR_EL2_HDBSS;

VTCR_EL2_HA is also a necessity for VTCR_EL2_HDBSS to take effect.

> +
> +		/*
> +		 * We should kick vcpus out of guest mode here to load new
> +		 * vtcr value to vtcr_el2 register when re-enter guest mode.
> +		 */
> +		kvm_for_each_vcpu(i, vcpu, kvm)
> +			kvm_vcpu_kick(vcpu);
> +	} else if (kvm->enable_hdbss) {
> +		kvm->arch.mmu.vtcr &= ~(VTCR_EL2_HD | VTCR_EL2_HDBSS);
> +
> +		kvm_for_each_vcpu(i, vcpu, kvm) {
> +			/* Kick vcpus to flush hdbss buffer. */
> +			kvm_vcpu_kick(vcpu);
> +
> +			kvm_arm_vcpu_free_hdbss(vcpu);
> +		}
> +
> +		kvm->enable_hdbss = false;
> +	}
> +
> +	return ret;
> +
> +error_alloc:
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		if (!vcpu->arch.hdbss.base_phys && !vcpu->arch.hdbss.size)
> +			continue;
> +
> +		kvm_arm_vcpu_free_hdbss(vcpu);
> +	}
> +
> +	return ret;
> +}
> +
>   int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>   			    struct kvm_enable_cap *cap)
>   {
> @@ -132,6 +218,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>   		}
>   		mutex_unlock(&kvm->lock);
>   		break;
> +	case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
> +		mutex_lock(&kvm->lock);
> +		r = kvm_cap_arm_enable_hdbss(kvm, cap);
> +		mutex_unlock(&kvm->lock);
> +		break;
>   	default:
>   		break;
>   	}
> @@ -420,6 +511,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>   			r = kvm_supports_cacheable_pfnmap();
>   		break;
> 
> +	case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK:
> +		r = system_supports_hdbss();
> +		break;
>   	default:
>   		r = 0;
>   	}
> @@ -1837,7 +1931,20 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> 
>   void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
>   {
> +	/*
> +	 * Flush all CPUs' dirty log buffers to the dirty_bitmap.  Called
> +	 * before reporting dirty_bitmap to userspace.  KVM flushes the buffers
> +	 * on all VM-Exits, thus we only need to kick running vCPUs to force a
> +	 * VM-Exit.
> +	 */
> +	struct kvm_vcpu *vcpu;
> +	unsigned long i;
> 
> +	if (!kvm->enable_hdbss)
> +		return;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		kvm_vcpu_kick(vcpu);
>   }
> 
>   static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index cc7d5d1709cb..9ba0ea6305ef 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -412,6 +412,49 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
>   	return arm_exit_handlers[esr_ec];
>   }
> 
> +static void kvm_flush_hdbss_buffer(struct kvm_vcpu *vcpu)
> +{
> +	int idx, curr_idx;
> +	u64 *hdbss_buf;
> +	struct kvm *kvm = vcpu->kvm;
> +	u64 br_el2;
> +
> +	if (!kvm->enable_hdbss)
> +		return;
> +
> +	dsb(sy);
> +	isb();
> +	curr_idx = HDBSSPROD_IDX(read_sysreg_s(SYS_HDBSSPROD_EL2));
> +	br_el2 = HDBSSBR_EL2(vcpu->arch.hdbss.base_phys, vcpu->arch.hdbss.size);
> +
> +	/* Do nothing if HDBSS buffer is empty or br_el2 is NULL */
> +	if (curr_idx == 0 || br_el2 == 0)
> +		return;
> +
> +	hdbss_buf = page_address(phys_to_page(vcpu->arch.hdbss.base_phys));
> +	if (!hdbss_buf) {
> +		kvm_err("Enter flush hdbss buffer with buffer == NULL!");
> +		return;
> +	}
> +
> +	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
> +	for (idx = 0; idx < curr_idx; idx++) {
> +		u64 gpa;
> +
> +		gpa = hdbss_buf[idx];
> +		if (!(gpa & HDBSS_ENTRY_VALID))
> +			continue;
> +
> +		gpa &= HDBSS_ENTRY_IPA;
> +		kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
> +	}
> +
> +	/* reset HDBSS index */
> +	write_sysreg_s(0, SYS_HDBSSPROD_EL2);
> +	vcpu->arch.hdbss.next_index = 0;
> +	isb();
> +}
> +
>   /*
>    * We may be single-stepping an emulated instruction. If the emulation
>    * has been completed in the kernel, we can return to userspace with a
> @@ -447,6 +490,8 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
>   {
>   	struct kvm_run *run = vcpu->run;
> 
> +	kvm_flush_hdbss_buffer(vcpu);
> +
>   	if (ARM_SERROR_PENDING(exception_index)) {
>   		/*
>   		 * The SError is handled by handle_exit_early(). If the guest
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> index 9984c492305a..3787c9c5810d 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -220,6 +220,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
>   	__vcpu_load_switch_sysregs(vcpu);
>   	__vcpu_load_activate_traps(vcpu);
>   	__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
> +	__load_hdbss(vcpu);
>   }
> 
>   void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 7cc964af8d30..91a2f9dbb406 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1843,6 +1843,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>   	if (writable)
>   		prot |= KVM_PGTABLE_PROT_W;
> 
> +	if (writable && kvm->enable_hdbss && logging_active)
> +		prot |= KVM_PGTABLE_PROT_DBM;
> +
>   	if (exec_fault)
>   		prot |= KVM_PGTABLE_PROT_X;
> 
> @@ -1950,6 +1953,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
> 
>   	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
> 
> +	/*
> +	 * HDBSS buffer already flushed when enter handle_trap_exceptions().
> +	 * Nothing to do here.
> +	 */
> +	if (ESR_ELx_ISS2(esr) & ESR_ELx_HDBSSF)
> +		return 1;
> +
>   	if (esr_fsc_is_translation_fault(esr)) {
>   		/* Beyond sanitised PARange (which is the IPA limit) */
>   		if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 959532422d3a..65e8f890f863 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -161,6 +161,9 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
>   	free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
>   	kfree(vcpu->arch.vncr_tlb);
>   	kfree(vcpu->arch.ccsidr);
> +
> +	if (vcpu->arch.hdbss.base_phys || vcpu->arch.hdbss.size)
> +		kvm_arm_vcpu_free_hdbss(vcpu);
>   }
> 
>   static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5bd76cf394fa..aa8138604b1e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -876,6 +876,7 @@ struct kvm {
>   	struct xarray mem_attr_array;
>   #endif
>   	char stats_id[KVM_STATS_NAME_SIZE];
> +	bool enable_hdbss;
>   };
> 
>   #define kvm_err(fmt, ...) \
> --
> 2.33.0
> 
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ