linux-kernel - Re: [PATCH v2 2/9] KVM: arm64: Add a range to __pkvm_host_share

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <Z-6o0zcLa4Aw0N6R@google.com>
Date: Thu, 3 Apr 2025 15:27:15 +0000
From: Quentin Perret <qperret@...gle.com>
To: Vincent Donnefort <vdonnefort@...gle.com>
Cc: maz@...nel.org, oliver.upton@...ux.dev, joey.gouly@....com,
	suzuki.poulose@....com, yuzenghui@...wei.com,
	catalin.marinas@....com, will@...nel.org,
	linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.linux.dev,
	linux-kernel@...r.kernel.org, kernel-team@...roid.com
Subject: Re: [PATCH v2 2/9] KVM: arm64: Add a range to
 __pkvm_host_share_guest()

On Thursday 06 Mar 2025 at 11:00:31 (+0000), Vincent Donnefort wrote:
> +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
>  			    enum kvm_pgtable_prot prot)
>  {
>  	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
>  	u64 phys = hyp_pfn_to_phys(pfn);
>  	u64 ipa = hyp_pfn_to_phys(gfn);
> +	enum pkvm_page_state state;
>  	struct hyp_page *page;
> +	u64 size;
>  	int ret;
>  
>  	if (prot & ~KVM_PGTABLE_PROT_RWX)
>  		return -EINVAL;
>  
> -	ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
> +	ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
> +	if (ret)
> +		return ret;
> +
> +	ret = check_range_allowed_memory(phys, phys + size);
>  	if (ret)
>  		return ret;
>  
>  	host_lock_component();
>  	guest_lock_component(vm);
>  
> -	ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
> +	ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
>  	if (ret)
>  		goto unlock;
>  
> -	page = hyp_phys_to_page(phys);
> -	switch (page->host_state) {
> +	state = hyp_phys_to_page(phys)->host_state;
> +	for_each_hyp_page(phys, size, page) {
> +		if (page->host_state != state) {
> +			ret = -EPERM;
> +			goto unlock;
> +		}
> +	}
> +
> +	switch (state) {
>  	case PKVM_PAGE_OWNED:
> -		WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
> +		WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
>  		break;
>  	case PKVM_PAGE_SHARED_OWNED:
> -		if (page->host_share_guest_count)
> -			break;
> -		/* Only host to np-guest multi-sharing is tolerated */
> -		WARN_ON(1);
> -		fallthrough;
> +		for_each_hyp_page(phys, size, page) {
> +			/* Only host to np-guest multi-sharing is tolerated */
> +			if (WARN_ON(!page->host_share_guest_count)) {
> +				ret = -EPERM;
> +				goto unlock;
> +			}
> +		}
> +		break;
>  	default:
>  		ret = -EPERM;
>  		goto unlock;
>  	}
>  
> -	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
> +	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
>  				       pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
>  				       &vcpu->vcpu.arch.pkvm_memcache, 0));
> -	page->host_share_guest_count++;
> +	__host_update_share_guest_count(phys, size, true);

So we're walking the entire phys range 3 times;

	1. to check the host_state is consistent with that of the first
	page;

	2. to set the state to SHARED_OWNED or to check the
	host_share_guest_count;

	3. and then again here to update the host share guest count

I feel like we could probably remove at least one loop with a pattern
like so:

	for_each_hyp_page(phys, size, page) {
		switch (page->state) {
		case PKVM_PAGE_OWNED:
			continue;
		case PKVM_PAGE_SHARED_BORROWED:
			if (page->host_shared_guest_count)
				continue;
			fallthrough;
		default;
			ret = -EPERM;
			goto unlock;
		}
	}

	for_each_hyp_page(phys, size, page) {
		page->host_state = PKVM_PAGE_SHARED_OWNED;
		page->host_share_guest_count++;
	}

That would also tolerate a mix of OWNED and SHARED_OWNED page in the
range, which I'm not sure is needed but it doesn't cost us anything to
support so ... :-)

Wdyt?

>  unlock:
>  	guest_unlock_component(vm);
> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> index 930b677eb9b0..00fd9a524bf7 100644
> --- a/arch/arm64/kvm/pkvm.c
> +++ b/arch/arm64/kvm/pkvm.c
> @@ -361,7 +361,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  		return -EINVAL;
>  
>  	lockdep_assert_held_write(&kvm->mmu_lock);
> -	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
> +	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, 1, prot);
>  	if (ret) {
>  		/* Is the gfn already mapped due to a racing vCPU? */
>  		if (ret == -EPERM)
> -- 
> 2.48.1.711.g2feabab25a-goog
>