linux-kernel - Re: [PATCH v2 19/24] kvm: arm64: Intercept host's PSCI_CPU

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87h7pg57jv.wl-maz@kernel.org>
Date:   Mon, 23 Nov 2020 17:04:04 +0000
From:   Marc Zyngier <maz@...nel.org>
To:     David Brazdil <dbrazdil@...gle.com>
Cc:     kvmarm@...ts.cs.columbia.edu, linux-arm-kernel@...ts.infradead.org,
        linux-kernel@...r.kernel.org, James Morse <james.morse@....com>,
        Julien Thierry <julien.thierry.kdev@...il.com>,
        Suzuki K Poulose <suzuki.poulose@....com>,
        Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will@...nel.org>, Dennis Zhou <dennis@...nel.org>,
        Tejun Heo <tj@...nel.org>, Christoph Lameter <cl@...ux.com>,
        Mark Rutland <mark.rutland@....com>,
        Lorenzo Pieralisi <lorenzo.pieralisi@....com>,
        Quentin Perret <qperret@...gle.com>,
        Andrew Scull <ascull@...gle.com>,
        Andrew Walbran <qwandor@...gle.com>, kernel-team@...roid.com
Subject: Re: [PATCH v2 19/24] kvm: arm64: Intercept host's PSCI_CPU_ON SMCs

On Mon, 16 Nov 2020 20:43:13 +0000,
David Brazdil <dbrazdil@...gle.com> wrote:
> 
> Add a handler of the CPU_ON PSCI call from host. When invoked, it looks
> up the logical CPU ID corresponding to the provided MPIDR and populates
> the state struct of the target CPU with the provided x0, pc. It then
> calls CPU_ON itself, with an entry point in hyp that initializes EL2
> state before returning ERET to the provided PC in EL1.
> 
> There is a simple atomic lock around the reset state struct. If it is
> already locked, CPU_ON will return PENDING_ON error code.
> 
> Signed-off-by: David Brazdil <dbrazdil@...gle.com>
> ---
>  arch/arm64/include/asm/kvm_asm.h     |   8 ++-
>  arch/arm64/kvm/arm.c                 |   1 +
>  arch/arm64/kvm/hyp/nvhe/psci-relay.c | 104 +++++++++++++++++++++++++++
>  3 files changed, 110 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 109867fb76f6..2e36ba4be748 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -175,9 +175,11 @@ struct kvm_s2_mmu;
>  DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
>  DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector);
>  DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
> -#define __kvm_hyp_init		CHOOSE_NVHE_SYM(__kvm_hyp_init)
> -#define __kvm_hyp_host_vector	CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
> -#define __kvm_hyp_vector	CHOOSE_HYP_SYM(__kvm_hyp_vector)
> +DECLARE_KVM_NVHE_SYM(__kvm_hyp_psci_cpu_entry);
> +#define __kvm_hyp_init			CHOOSE_NVHE_SYM(__kvm_hyp_init)
> +#define __kvm_hyp_host_vector		CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
> +#define __kvm_hyp_vector		CHOOSE_HYP_SYM(__kvm_hyp_vector)
> +#define __kvm_hyp_psci_cpu_entry	CHOOSE_NVHE_SYM(__kvm_hyp_psci_cpu_entry)
>  
>  extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
>  DECLARE_KVM_NVHE_SYM(__per_cpu_start);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 7d2270eeecfb..c76a8e5bd19c 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1365,6 +1365,7 @@ static void cpu_init_hyp_mode(void)
>  
>  	params->vector_hyp_va = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector));
>  	params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
> +	params->entry_hyp_va = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_psci_cpu_entry));

It feels really odd to use a per-CPU variable to keep track of
something that is essentially a constant. Why can't we just have an
assembly version of __kimg_hyp_va() and use that to compute the branch
target directly in __kvm_hyp_cpu_entry()? __kvm_hyp_host_vector is
another one.

>  	params->pgd_pa = kvm_mmu_get_httbr();
>  
>  	/*
> diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> index 7542de8bd679..2daf52b59846 100644
> --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> @@ -9,10 +9,15 @@
>  #include <asm/kvm_mmu.h>
>  #include <kvm/arm_hypercalls.h>
>  #include <linux/arm-smccc.h>
> +#include <linux/kvm_host.h>
>  #include <linux/psci.h>
>  #include <kvm/arm_psci.h>
>  #include <uapi/linux/psci.h>
>  
> +#define INVALID_CPU_ID UINT_MAX
> +
> +extern char __kvm_hyp_cpu_entry[];
> +
>  /* Config options set by the host. */
>  u32 __ro_after_init kvm_host_psci_version = PSCI_VERSION(0, 0);
>  u32 __ro_after_init kvm_host_psci_function_id[PSCI_FN_MAX];
> @@ -20,6 +25,14 @@ s64 __ro_after_init hyp_physvirt_offset;
>  
>  #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
>  
> +struct kvm_host_psci_state {
> +	atomic_t pending_on;
> +	unsigned long pc;
> +	unsigned long r0;
> +};
> +
> +static DEFINE_PER_CPU(struct kvm_host_psci_state, kvm_host_psci_state);
> +
>  static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
>  {
>  	return host_ctxt->regs.regs[0];
> @@ -76,10 +89,99 @@ static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *ho
>  	hyp_panic(); /* unreachable */
>  }
>  
> +static unsigned int find_cpu_id(u64 mpidr)
> +{
> +	int i;

nit: unsigned int?

> +
> +	if (mpidr != INVALID_HWID) {

This is a little ugly on the side [(c) FZ], and deserves a comment
("Reject MPIDRs matching the init value of the __cpu_logical_map[]
array"?).

Also, I personally prefer a construct that reduces the nesting:

	if (mpidr == INVALID_HWID)
		return INVALID_CPU_ID;

> +		for (i = 0; i < NR_CPUS; i++) {
> +			if (cpu_logical_map(i) == mpidr)
> +				return i;
> +		}
> +	}
> +
> +	return INVALID_CPU_ID;
> +}
> +
> +static bool try_acquire_reset_state(struct kvm_host_psci_state *cpu_state,
> +				    unsigned long pc, unsigned long r0)
> +{
> +	if (atomic_cmpxchg_acquire(&cpu_state->pending_on, 0, 1) != 0)

What guarantees that this cmpxchg is inlined here? Also, having some
names for 0 and 1 would be nice.

> +		return false;
> +
> +	cpu_state->pc = pc;
> +	cpu_state->r0 = r0;
> +	wmb();
> +
> +	return true;
> +}
> +
> +static void release_reset_state(struct kvm_host_psci_state *cpu_state)
> +{
> +	atomic_set_release(&cpu_state->pending_on, 0);
> +}
> +
> +static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
> +{
> +	u64 mpidr = host_ctxt->regs.regs[1];
> +	unsigned long pc = host_ctxt->regs.regs[2];
> +	unsigned long r0 = host_ctxt->regs.regs[3];
> +	unsigned int cpu_id;
> +	struct kvm_host_psci_state *cpu_state;
> +	struct kvm_nvhe_init_params *cpu_params;
> +	int ret;
> +
> +	/*
> +	 * Find the logical CPU ID for the given MPIDR. The search set is
> +	 * the set of CPUs that were online at the point of KVM initialization.
> +	 * Booting other CPUs is rejected because their cpufeatures were not
> +	 * checked against the finalized capabilities. This could be relaxed
> +	 * by doing the feature checks in hyp.
> +	 */
> +	cpu_id = find_cpu_id(mpidr);
> +	if (cpu_id == INVALID_CPU_ID)
> +		return PSCI_RET_INVALID_PARAMS;
> +
> +	cpu_state = per_cpu_ptr(&kvm_host_psci_state, cpu_id);
> +	cpu_params = per_cpu_ptr(&kvm_init_params, cpu_id);
> +
> +	if (!try_acquire_reset_state(cpu_state, pc, r0))
> +		return PSCI_RET_ALREADY_ON;
> +
> +	ret = psci_call(func_id, mpidr,
> +			__hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_entry)),
> +			__hyp_pa(cpu_params));
> +
> +	/*
> +	 * If CPU_ON was successful, the reset state will be released in
> +	 * kvm_host_psci_cpu_entry().
> +	 */
> +	if (ret != PSCI_RET_SUCCESS)
> +		release_reset_state(cpu_state);
> +	return ret;
> +}
> +
> +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
> +
> +asmlinkage void __noreturn __kvm_hyp_psci_cpu_entry(void)
> +{
> +	struct kvm_host_psci_state *cpu_state = this_cpu_ptr(&kvm_host_psci_state);
> +	struct kvm_cpu_context *host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
> +
> +	host_ctxt->regs.regs[0] = cpu_state->r0;
> +	write_sysreg_el2(cpu_state->pc, SYS_ELR);
> +
> +	release_reset_state(cpu_state);
> +
> +	__host_enter(host_ctxt);
> +}
> +
>  static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
>  {
>  	if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_OFF])
>  		return psci_forward(host_ctxt);
> +	else if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_ON])
> +		return psci_cpu_on(func_id, host_ctxt);
>  	else if (func_id == kvm_host_psci_function_id[PSCI_FN_MIGRATE])
>  		return psci_forward(host_ctxt);
>  	else
> @@ -100,6 +202,8 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_
>  	case PSCI_0_2_FN_SYSTEM_RESET:
>  		psci_forward_noreturn(host_ctxt);
>  		unreachable();
> +	case PSCI_0_2_FN64_CPU_ON:
> +		return psci_cpu_on(func_id, host_ctxt);
>  	default:
>  		return PSCI_RET_NOT_SUPPORTED;
>  	}
> -- 
> 2.29.2.299.gdc1121823c-goog
> 
> 

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.