[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+_y_2FMrc=XqAPK-WVtb5No9xYXOXmaLbVE+AEGZL668YhKGQ@mail.gmail.com>
Date: Fri, 10 Dec 2021 15:18:50 +0000
From: Andrew Walbran <qwandor@...gle.com>
To: Quentin Perret <qperret@...gle.com>
Cc: Marc Zyngier <maz@...nel.org>, James Morse <james.morse@....com>,
Alexandru Elisei <alexandru.elisei@....com>,
Suzuki K Poulose <suzuki.poulose@....com>,
Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>,
linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
linux-kernel@...r.kernel.org, kernel-team@...roid.com
Subject: Re: [PATCH v3 11/15] KVM: arm64: Implement do_share() helper for
sharing memory
Reviewed-by: Andrew Walbran <qwandor@...gle.com>
On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team
<kernel-team@...roid.com> wrote:
>
> From: Will Deacon <will@...nel.org>
>
> By default, protected KVM isolates memory pages so that they are
> accessible only to their owner: be it the host kernel, the hypervisor
> at EL2 or (in future) the guest. Establishing shared-memory regions
> between these components therefore involves a transition for each page
> so that the owner can share memory with a borrower under a certain set
> of permissions.
>
> Introduce a do_share() helper for safely sharing a memory region between
> two components. Currently, only host-to-hyp sharing is implemented, but
> the code is easily extended to handle other combinations and the
> permission checks for each component are reusable.
>
> Signed-off-by: Will Deacon <will@...nel.org>
> Signed-off-by: Quentin Perret <qperret@...gle.com>
> ---
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 237 ++++++++++++++++++++++++++
> 1 file changed, 237 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 757dfefe3aeb..74ca4043b08a 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -471,3 +471,240 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
> ret = host_stage2_idmap(addr);
> BUG_ON(ret && ret != -EAGAIN);
> }
> +
> +/* This corresponds to locking order */
> +enum pkvm_component_id {
> + PKVM_ID_HOST,
> + PKVM_ID_HYP,
> +};
> +
> +struct pkvm_mem_transition {
> + u64 nr_pages;
> +
> + struct {
> + enum pkvm_component_id id;
> + /* Address in the initiator's address space */
> + u64 addr;
> +
> + union {
> + struct {
> + /* Address in the completer's address space */
> + u64 completer_addr;
> + } host;
> + };
> + } initiator;
> +
> + struct {
> + enum pkvm_component_id id;
> + } completer;
> +};
> +
> +struct pkvm_mem_share {
> + const struct pkvm_mem_transition tx;
> + const enum kvm_pgtable_prot prot;
It would be helpful to add a comment documenting what this is used for
(i.e. whether it is for the initiator or completer). Or even rename it
to something like completer_prot to make that clear.
> +};
> +
> +struct check_walk_data {
> + enum pkvm_page_state desired;
> + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
> +};
> +
> +static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
> + kvm_pte_t *ptep,
> + enum kvm_pgtable_walk_flags flag,
> + void * const arg)
> +{
> + struct check_walk_data *d = arg;
> + kvm_pte_t pte = *ptep;
> +
> + if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
> + return -EINVAL;
> +
> + return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
> +}
> +
> +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
> + struct check_walk_data *data)
> +{
> + struct kvm_pgtable_walker walker = {
> + .cb = __check_page_state_visitor,
> + .arg = data,
> + .flags = KVM_PGTABLE_WALK_LEAF,
> + };
> +
> + return kvm_pgtable_walk(pgt, addr, size, &walker);
> +}
> +
> +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
> +{
> + if (!kvm_pte_valid(pte) && pte)
> + return PKVM_NOPAGE;
> +
> + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
> +}
> +
> +static int __host_check_page_state_range(u64 addr, u64 size,
> + enum pkvm_page_state state)
> +{
> + struct check_walk_data d = {
> + .desired = state,
> + .get_page_state = host_get_page_state,
> + };
> +
> + hyp_assert_lock_held(&host_kvm.lock);
> + return check_page_state_range(&host_kvm.pgt, addr, size, &d);
> +}
> +
> +static int __host_set_page_state_range(u64 addr, u64 size,
> + enum pkvm_page_state state)
> +{
> + enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
> +
> + return host_stage2_idmap_locked(addr, size, prot);
> +}
> +
> +static int host_request_owned_transition(u64 *completer_addr,
> + const struct pkvm_mem_transition *tx)
> +{
> + u64 size = tx->nr_pages * PAGE_SIZE;
> + u64 addr = tx->initiator.addr;
> +
> + *completer_addr = tx->initiator.host.completer_addr;
> + return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
> +}
> +
> +static int host_initiate_share(u64 *completer_addr,
> + const struct pkvm_mem_transition *tx)
> +{
> + u64 size = tx->nr_pages * PAGE_SIZE;
> + u64 addr = tx->initiator.addr;
> +
> + *completer_addr = tx->initiator.host.completer_addr;
> + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
> +}
> +
> +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
> +{
> + if (!kvm_pte_valid(pte))
> + return PKVM_NOPAGE;
> +
> + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
> +}
> +
> +static int __hyp_check_page_state_range(u64 addr, u64 size,
> + enum pkvm_page_state state)
> +{
> + struct check_walk_data d = {
> + .desired = state,
> + .get_page_state = hyp_get_page_state,
> + };
> +
> + hyp_assert_lock_held(&pkvm_pgd_lock);
> + return check_page_state_range(&pkvm_pgtable, addr, size, &d);
> +}
> +
> +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
> +{
> + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
> + tx->initiator.id != PKVM_ID_HOST);
> +}
> +
> +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
> + enum kvm_pgtable_prot perms)
> +{
> + u64 size = tx->nr_pages * PAGE_SIZE;
> +
> + if (perms != PAGE_HYP)
> + return -EPERM;
> +
> + if (__hyp_ack_skip_pgtable_check(tx))
> + return 0;
> +
> + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
> +}
> +
> +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
> + enum kvm_pgtable_prot perms)
> +{
> + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
> + enum kvm_pgtable_prot prot;
> +
> + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
> + return pkvm_create_mappings_locked(start, end, prot);
> +}
> +
> +static int check_share(struct pkvm_mem_share *share)
> +{
> + const struct pkvm_mem_transition *tx = &share->tx;
> + u64 completer_addr;
> + int ret;
> +
> + switch (tx->initiator.id) {
> + case PKVM_ID_HOST:
> + ret = host_request_owned_transition(&completer_addr, tx);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> +
> + if (ret)
> + return ret;
> +
> + switch (tx->completer.id) {
> + case PKVM_ID_HYP:
> + ret = hyp_ack_share(completer_addr, tx, share->prot);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> +
> + return ret;
> +}
> +
> +static int __do_share(struct pkvm_mem_share *share)
> +{
> + const struct pkvm_mem_transition *tx = &share->tx;
> + u64 completer_addr;
> + int ret;
> +
> + switch (tx->initiator.id) {
> + case PKVM_ID_HOST:
> + ret = host_initiate_share(&completer_addr, tx);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> +
> + if (ret)
> + return ret;
> +
> + switch (tx->completer.id) {
> + case PKVM_ID_HYP:
> + ret = hyp_complete_share(completer_addr, tx, share->prot);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> +
> + return ret;
> +}
> +
> +/*
> + * do_share():
> + *
> + * The page owner grants access to another component with a given set
> + * of permissions.
> + *
> + * Initiator: OWNED => SHARED_OWNED
> + * Completer: NOPAGE => SHARED_BORROWED
> + */
> +static int do_share(struct pkvm_mem_share *share)
> +{
> + int ret;
> +
> + ret = check_share(share);
> + if (ret)
> + return ret;
> +
> + return WARN_ON(__do_share(share));
> +}
> --
> 2.34.0.rc2.393.gf8c9666880-goog
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@...roid.com.
>
Powered by blists - more mailing lists