lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1503649901-5834-5-git-send-email-florent.revest@arm.com>
Date:   Fri, 25 Aug 2017 09:31:34 +0100
From:   Florent Revest <florent.revest@....com>
To:     linux-arm-kernel@...ts.infradead.org
Cc:     matt@...eblueprint.co.uk, ard.biesheuvel@...aro.org,
        pbonzini@...hat.com, rkrcmar@...hat.com,
        christoffer.dall@...aro.org, catalin.marinas@....com,
        will.deacon@....com, mark.rutland@....com, marc.zyngier@....com,
        linux-efi@...r.kernel.org, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org, kvmarm@...ts.cs.columbia.edu,
        leif.lindholm@....com, revestflo@...il.com,
        Florent Revest <florent.revest@....com>
Subject: [RFC 04/11] KVM, arm, arm64: Offer PAs to IPAs idmapping to internal VMs

Usual KVM virtual machines map guest's physical addresses from a process
userspace memory. However, with the new concept of internal VMs, a virtual
machine can be created from the kernel, without any link to a userspace
context. Hence, some of the KVM's architecture-specific code needs to be
modified to take this kind of VMs into account.

The approach chosen with this patch is to let internal VMs idmap physical
addresses into intermediary physical addresses by calling
kvm_set_memory_region with a kvm_userspace_memory_region where the
guest_phys_addr field points both to the original PAs and to the IPAs. The
userspace_addr field of this struct is therefore ignored with internal VMs.

This patch extends the capabilities of the arm and arm64 stage2 MMU code
to handle internal VMs. Three things are changed:

- Various parts of the MMU code which are related to a userspace context
are now only executed if kvm->mm is present.

- When this pointer is NULL, struct kvm_userspace_memory_regions are
treated by internal_vm_prep_mem as idmaps of physical memory.

- A set of 256 additional private memslots is now reserved on arm64 for the
usage of internal VMs memory idmapping.

Note: this patch should have pretty much no performance impact on the
critical path of traditional VMs since only one unlikely branch had to be
added to the page fault handler.

Signed-off-by: Florent Revest <florent.revest@....com>
---
 arch/arm64/include/asm/kvm_host.h |  1 +
 virt/kvm/arm/mmu.c                | 76 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d686300..65aab35 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED

 #define KVM_USER_MEM_SLOTS 512
+#define KVM_PRIVATE_MEM_SLOTS 256
 #define KVM_HALT_POLL_NS_DEFAULT 500000

 #include <kvm/arm_vgic.h>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 2ea21da..1d2d3df 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -772,6 +772,11 @@ static void stage2_unmap_memslot(struct kvm *kvm,
        phys_addr_t size = PAGE_SIZE * memslot->npages;
        hva_t reg_end = hva + size;

+       if (unlikely(!kvm->mm)) {
+               unmap_stage2_range(kvm, addr, size);
+               return;
+       }
+
        /*
         * A memory region could potentially cover multiple VMAs, and any holes
         * between them, so iterate over all of them to find out if we should
@@ -819,7 +824,8 @@ void stage2_unmap_vm(struct kvm *kvm)
        int idx;

        idx = srcu_read_lock(&kvm->srcu);
-       down_read(&current->mm->mmap_sem);
+       if (likely(kvm->mm))
+               down_read(&current->mm->mmap_sem);
        spin_lock(&kvm->mmu_lock);

        slots = kvm_memslots(kvm);
@@ -827,7 +833,8 @@ void stage2_unmap_vm(struct kvm *kvm)
                stage2_unmap_memslot(kvm, memslot);

        spin_unlock(&kvm->mmu_lock);
-       up_read(&current->mm->mmap_sem);
+       if (likely(kvm->mm))
+               up_read(&current->mm->mmap_sem);
        srcu_read_unlock(&kvm->srcu, idx);
 }

@@ -1303,6 +1310,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return -EFAULT;
        }

+       if (unlikely(!kvm->mm)) {
+               kvm_err("Unexpected internal VM page fault\n");
+               kvm_inject_vabt(vcpu);
+               return 0;
+       }
+
        /* Let's check if we will get back a huge page backed by hugetlbfs */
        down_read(&current->mm->mmap_sem);
        vma = find_vma_intersection(current->mm, hva, hva + 1);
@@ -1850,6 +1863,54 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                kvm_mmu_wp_memory_region(kvm, mem->slot);
 }

+/*
+ * internal_vm_prep_mem - maps a range of hpa to gpa at stage2
+ *
+ * While userspace VMs manage gpas using hvas, internal virtual machines need a
+ * way to map physical addresses to a guest. In order to avoid code duplication,
+ * the kvm_set_memory_region call is kept for internal VMs, however it usually
+ * expects a struct kvm_userspace_memory_region with a userspace_addr field.
+ * With internal VMs, this field is ignored and physical memory memory pointed
+ * by guest_phys_addr can only be idmapped.
+ */
+static int internal_vm_prep_mem(struct kvm *kvm,
+                               const struct kvm_userspace_memory_region *mem)
+{
+       phys_addr_t addr, end;
+       unsigned long pfn;
+       int ret;
+       struct kvm_mmu_memory_cache cache = { 0 };
+
+       end = mem->guest_phys_addr + mem->memory_size;
+       pfn = __phys_to_pfn(mem->guest_phys_addr);
+       addr = mem->guest_phys_addr;
+
+       for (; addr < end; addr += PAGE_SIZE) {
+               pte_t pte = pfn_pte(pfn, PAGE_S2);
+
+               pte = kvm_s2pte_mkwrite(pte);
+
+               ret = mmu_topup_memory_cache(&cache,
+                                            KVM_MMU_CACHE_MIN_PAGES,
+                                            KVM_NR_MEM_OBJS);
+               if (ret) {
+                       mmu_free_memory_cache(&cache);
+                       return ret;
+               }
+               spin_lock(&kvm->mmu_lock);
+               ret = stage2_set_pte(kvm, &cache, addr, &pte, 0);
+               spin_unlock(&kvm->mmu_lock);
+               if (ret) {
+                       mmu_free_memory_cache(&cache);
+                       return ret;
+               }
+
+               pfn++;
+       }
+
+       return ret;
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot,
                                   const struct kvm_userspace_memory_region *mem,
@@ -1872,6 +1933,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
            (KVM_PHYS_SIZE >> PAGE_SHIFT))
                return -EFAULT;

+       if (unlikely(!kvm->mm)) {
+               ret = internal_vm_prep_mem(kvm, mem);
+               if (ret)
+                       goto out;
+               goto out_internal_vm;
+       }
+
        down_read(&current->mm->mmap_sem);
        /*
         * A memory region could potentially cover multiple VMAs, and any holes
@@ -1930,6 +1998,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                hva = vm_end;
        } while (hva < reg_end);

+out_internal_vm:
        if (change == KVM_MR_FLAGS_ONLY)
                goto out;

@@ -1940,7 +2009,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                stage2_flush_memslot(kvm, memslot);
        spin_unlock(&kvm->mmu_lock);
 out:
-       up_read(&current->mm->mmap_sem);
+       if (kvm->mm)
+               up_read(&current->mm->mmap_sem);
        return ret;
 }

--
1.9.1

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ