lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86bju3onl3.wl-maz@kernel.org>
Date: Fri, 14 Mar 2025 11:31:36 +0000
From: Marc Zyngier <maz@...nel.org>
To: Quentin Perret <qperret@...gle.com>
Cc: Oliver Upton <oliver.upton@...ux.dev>,
	Joey Gouly <joey.gouly@....com>,
	Suzuki K Poulose <suzuki.poulose@....com>,
	Zenghui Yu <yuzenghui@...wei.com>,
	Catalin Marinas <catalin.marinas@....com>,
	Will Deacon <will@...nel.org>,
	Vincent Donnefort <vdonnefort@...gle.com>,
	linux-arm-kernel@...ts.infradead.org,
	kvmarm@...ts.linux.dev,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 4/6] KVM: arm64: Move hyp state to hyp_vmemmap

On Thu, 27 Feb 2025 00:33:08 +0000,
Quentin Perret <qperret@...gle.com> wrote:
> 
> Tracking the hypervisor's ownership state into struct hyp_page has
> several benefits, including allowing far more efficient lookups (no
> page-table walk needed) and de-corelating the state from the presence
> of a mapping. This will later allow to map pages into EL2 stage-1 less
> proactively which is generally a good thing for security. And in the
> future this will help with tracking the state of pages mapped into the
> hypervisor's private range without requiring an alias into the 'linear
> map' range.
> 
> Signed-off-by: Quentin Perret <qperret@...gle.com>
> ---
>  arch/arm64/kvm/hyp/include/nvhe/memory.h | 20 +++++++++-
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c    | 51 ++++++++++++------------
>  arch/arm64/kvm/hyp/nvhe/setup.c          |  6 ++-
>  3 files changed, 49 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
> index 4a3c55d26ef3..cc4c01158368 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
> @@ -22,6 +22,7 @@ enum pkvm_page_state {
>  	/* Meta-states which aren't encoded directly in the PTE's SW bits */
>  	PKVM_NOPAGE			= BIT(0) | BIT(1),
>  };
> +#define PKVM_PAGE_STATE_MASK		(BIT(0) | BIT(1))
>  
>  #define PKVM_PAGE_STATE_PROT_MASK	(KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
>  static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
> @@ -42,7 +43,14 @@ struct hyp_page {
>  	u8 order;
>  
>  	/* Host (non-meta) state. Guarded by the host stage-2 lock. */
> -	unsigned __host_state : 8;
> +	unsigned __host_state : 4;
> +
> +	/*
> +	 * Complement of the hyp (non-meta) state. Guarded by the hyp stage-1 lock. We use the
> +	 * complement so that the initial 0 in __hyp_state_comp (due to the entire vmemmap starting
> +	 * off zeroed) encodes PKVM_NOPAGE.
> +	 */
> +	unsigned __hyp_state_comp : 4;
>  
>  	u32 host_share_guest_count;
>  };
> @@ -89,6 +97,16 @@ static inline void set_host_state(phys_addr_t phys, enum pkvm_page_state state)
>  	hyp_phys_to_page(phys)->__host_state = state;
>  }
>  
> +static inline enum pkvm_page_state get_hyp_state(phys_addr_t phys)
> +{
> +	return hyp_phys_to_page(phys)->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK;
> +}
> +
> +static inline void set_hyp_state(phys_addr_t phys, enum pkvm_page_state state)
> +{
> +	hyp_phys_to_page(phys)->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK;
> +}
> +
>  /*
>   * Refcounting for 'struct hyp_page'.
>   * hyp_pool::lock must be held if atomic access to the refcount is required.
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index a45ffdec7612..3ab8c81500c2 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -642,24 +642,24 @@ static int __host_set_page_state_range(u64 addr, u64 size,
>  	return 0;
>  }
>  
> -static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
> +static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
>  {
> -	if (!kvm_pte_valid(pte))
> -		return PKVM_NOPAGE;
> +	phys_addr_t end = phys + size;
>  
> -	return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
> +	for (; phys < end; phys += PAGE_SIZE)
> +		set_hyp_state(phys, state);
>  }
>  
> -static int __hyp_check_page_state_range(u64 addr, u64 size,
> -					enum pkvm_page_state state)
> +static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
>  {
> -	struct check_walk_data d = {
> -		.desired	= state,
> -		.get_page_state	= hyp_get_page_state,
> -	};
> +	phys_addr_t end = phys + size;
> +
> +	for (; phys < end; phys += PAGE_SIZE) {
> +		if (get_hyp_state(phys) != state)
> +			return -EPERM;
> +	}
>  
> -	hyp_assert_lock_held(&pkvm_pgd_lock);
> -	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
> +	return 0;
>  }
>  
>  static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
> @@ -687,7 +687,6 @@ int __pkvm_host_share_hyp(u64 pfn)
>  {
>  	u64 phys = hyp_pfn_to_phys(pfn);
>  	void *virt = __hyp_va(phys);
> -	enum kvm_pgtable_prot prot;
>  	u64 size = PAGE_SIZE;
>  	int ret;
>  
> @@ -698,13 +697,13 @@ int __pkvm_host_share_hyp(u64 pfn)
>  	if (ret)
>  		goto unlock;
>  	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
> -		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
> +		ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);

OK, I think I finally clicked here. Does it mean that all the tracking
is now done in terms of PAs instead of VAs?

>  		if (ret)
>  			goto unlock;
>  	}
>  
> -	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
> -	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
> +	__hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
> +	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));

And this is the split between the state now being kept in the on a PA
base and the actual mapping that is now only takes the page attributes
and no SW bits?

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ