linux-kernel - Re: [PATCH v7 3/6] KVM: arm64: Add guard pages for KVM nVHE hypervisor stack

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87wnfmaexc.wl-maz@kernel.org>
Date:   Mon, 18 Apr 2022 11:01:19 +0100
From:   Marc Zyngier <maz@...nel.org>
To:     Kalesh Singh <kaleshsingh@...gle.com>
Cc:     will@...nel.org, qperret@...gle.com, tabba@...gle.com,
        surenb@...gle.com, kernel-team@...roid.com,
        James Morse <james.morse@....com>,
        Alexandru Elisei <alexandru.elisei@....com>,
        Suzuki K Poulose <suzuki.poulose@....com>,
        Catalin Marinas <catalin.marinas@....com>,
        Mark Rutland <mark.rutland@....com>,
        Andrew Jones <drjones@...hat.com>,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        Masahiro Yamada <masahiroy@...nel.org>,
        Changbin Du <changbin.du@...el.com>,
        linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 3/6] KVM: arm64: Add guard pages for KVM nVHE hypervisor stack

On Fri, 08 Apr 2022 21:03:26 +0100,
Kalesh Singh <kaleshsingh@...gle.com> wrote:
> 
> Map the stack pages in the flexible private VA range and allocate
> guard pages below the stack as unbacked VA space. The stack is aligned
> so that any valid stack address has PAGE_SHIFT bit as 1 - this is used
> for overflow detection (implemented in a subsequent patch in the series).
> 
> Signed-off-by: Kalesh Singh <kaleshsingh@...gle.com>
> Tested-by: Fuad Tabba <tabba@...gle.com>
> Reviewed-by: Fuad Tabba <tabba@...gle.com>
> ---
> 
> Changes in v7:
>   - Add Fuad's Reviewed-by and Tested-by tags.
> 
> Changes in v6:
>   - Update call to hyp_alloc_private_va_range() (return val and params)
> 
> Changes in v5:
>   - Use a single allocation for stack and guard pages to ensure they
>     are contiguous, per Marc
> 
> Changes in v4:
>   - Replace IS_ERR_OR_NULL check with IS_ERR check now that
>     hyp_alloc_private_va_range() returns an error for null
>     pointer, per Fuad
>   - Format comments to < 80 cols, per Fuad
> 
> Changes in v3:
>   - Handle null ptr in IS_ERR_OR_NULL checks, per Mark
> 
> 
>  arch/arm64/include/asm/kvm_asm.h |  1 +
>  arch/arm64/include/asm/kvm_mmu.h |  3 +++
>  arch/arm64/kvm/arm.c             | 39 +++++++++++++++++++++++++++++---
>  arch/arm64/kvm/mmu.c             |  4 ++--
>  4 files changed, 42 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index d5b0386ef765..2e277f2ed671 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -169,6 +169,7 @@ struct kvm_nvhe_init_params {
>  	unsigned long tcr_el2;
>  	unsigned long tpidr_el2;
>  	unsigned long stack_hyp_va;
> +	unsigned long stack_pa;
>  	phys_addr_t pgd_pa;
>  	unsigned long hcr_el2;
>  	unsigned long vttbr;
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index a50cbb5ba402..b805316c4866 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -117,6 +117,9 @@ alternative_cb_end
>  #include <asm/mmu_context.h>
>  #include <asm/kvm_host.h>
>  
> +extern struct kvm_pgtable *hyp_pgtable;
> +extern struct mutex kvm_hyp_pgd_mutex;

I'd rather you don't expose this at all.

> +
>  void kvm_update_va_mask(struct alt_instr *alt,
>  			__le32 *origptr, __le32 *updptr, int nr_inst);
>  void kvm_compute_layout(void);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 523bc934fe2f..5687c0175151 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1483,7 +1483,6 @@ static void cpu_prepare_hyp_mode(int cpu)
>  	tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
>  	params->tcr_el2 = tcr;
>  
> -	params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
>  	params->pgd_pa = kvm_mmu_get_httbr();
>  	if (is_protected_kvm_enabled())
>  		params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
> @@ -1933,14 +1932,48 @@ static int init_hyp_mode(void)
>  	 * Map the Hyp stack pages
>  	 */
>  	for_each_possible_cpu(cpu) {
> +		struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
>  		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
> -		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
> -					  PAGE_HYP);
> +		unsigned long hyp_addr;
>  
> +		/*
> +		 * Allocate a contiguous HYP private VA range for the stack
> +		 * and guard page. The allocation is also aligned based on
> +		 * the order of its size.
> +		 */
> +		err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
> +		if (err) {
> +			kvm_err("Cannot allocate hyp stack guard page\n");
> +			goto out_err;
> +		}
> +
> +		/*
> +		 * Since the stack grows downwards, map the stack to the page
> +		 * at the higher address and leave the lower guard page
> +		 * unbacked.
> +		 *
> +		 * Any valid stack address now has the PAGE_SHIFT bit as 1
> +		 * and addresses corresponding to the guard page have the
> +		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> +		 */
> +		mutex_lock(&kvm_hyp_pgd_mutex);
> +		err = kvm_pgtable_hyp_map(hyp_pgtable, hyp_addr + PAGE_SIZE,
> +					PAGE_SIZE, __pa(stack_page), PAGE_HYP);
> +		mutex_unlock(&kvm_hyp_pgd_mutex);

The mutex (and the HYP page table structure) really should stay
private to the MMU code. Just add a new helper that will take the lock
and use hyp_pgtable.

>  		if (err) {
>  			kvm_err("Cannot map hyp stack\n");
>  			goto out_err;
>  		}
> +
> +		/*
> +		 * Save the stack PA in nvhe_init_params. This will be needed
> +		 * to recreate the stack mapping in protected nVHE mode.
> +		 * __hyp_pa() won't do the right thing there, since the stack
> +		 * has been mapped in the flexible private VA space.
> +		 */
> +		params->stack_pa = __pa(stack_page);
> +
> +		params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
>  	}
>  
>  	for_each_possible_cpu(cpu) {
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 3d3efea4e991..a54f00bd06cc 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -22,8 +22,8 @@
>  
>  #include "trace.h"
>  
> -static struct kvm_pgtable *hyp_pgtable;
> -static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
> +struct kvm_pgtable *hyp_pgtable;
> +DEFINE_MUTEX(kvm_hyp_pgd_mutex);
>  
>  static unsigned long hyp_idmap_start;
>  static unsigned long hyp_idmap_end;

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.