linux-kernel - [Patch v2 1/4] KVM:MMU: Introduce the pin

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200819151742.7892-2-Eric.VanTassell@amd.com>
Date:   Wed, 19 Aug 2020 10:17:39 -0500
From:   eric van tassell <Eric.VanTassell@....com>
To:     kvm@...r.kernel.org
Cc:     linux-kernel@...r.kernel.org, bp@...en8.de, hpa@...or.com,
        mingo@...hat.com, jmattson@...gle.com, joro@...tes.org,
        pbonzini@...hat.com, sean.j.christopherson@...el.com,
        tglx@...utronix.de, vkuznets@...hat.com, wanpengli@...cent.com,
        x86@...nel.org, rientjes@...gle.com, junaids@...gle.com,
        evantass@....com
Subject: [Patch v2 1/4] KVM:MMU: Introduce the pin_page() callback

This generic callback will be called just before setting the spte.

Check the return code where not previously checked since this operation can
return an error.

Defer pinning of SEV guest pages until they're used in order to reduce SEV
guest startup time by eliminating the compute cycles required to pin all
the pages up front. Additionally, we want to reduce memory pressure due to
guests that reserve but only sparsely access a large amount of memory.

Co-developed-by: Brijesh Singh <brijesh.singh@....com>
Signed-off-by: eric van tassell <Eric.VanTassell@....com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/mmu/mmu.c          | 30 ++++++++++++++++++++++++++----
 arch/x86/kvm/mmu/paging_tmpl.h  | 27 ++++++++++++++++-----------
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5aaef036627f..767653fa3245 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1227,6 +1227,9 @@ struct kvm_x86_ops {
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+	int (*pin_page)(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
+			int level, u64 *spte);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fa506aaaf019..2b60fdb79b86 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3004,6 +3004,22 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
 			kvm_is_mmio_pfn(pfn));
 
+	if (kvm_x86_ops.pin_page && !kvm_is_mmio_pfn(pfn)) {
+		ret = kvm_x86_ops.pin_page(vcpu, gfn, pfn, level, &spte);
+		if (ret) {
+			if (WARN_ON_ONCE(ret > 0))
+				/*
+				 * pin_page() should return 0 on success
+				 * and non-zero preferably less than zero,
+				 * for failure.  We check for any unanticipated
+				 * positive return values here.
+				 */
+				ret = -EINVAL;
+
+			return ret;
+		}
+	}
+
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
 	else
@@ -3086,6 +3102,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
 				speculative, true, host_writable);
+	if (set_spte_ret < 0)
+		return set_spte_ret;
+
 	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
 		if (write_fault)
 			ret = RET_PF_EMULATE;
@@ -3134,7 +3153,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 	struct page *pages[PTE_PREFETCH_NUM];
 	struct kvm_memory_slot *slot;
 	unsigned int access = sp->role.access;
-	int i, ret;
+	int i, ret, error_ret = 0;
 	gfn_t gfn;
 
 	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
@@ -3147,12 +3166,15 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 		return -1;
 
 	for (i = 0; i < ret; i++, gfn++, start++) {
-		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
-			     page_to_pfn(pages[i]), true, true);
+		ret = mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+				   page_to_pfn(pages[i]), true, true);
+		if (ret < 0 && error_ret == 0) /* only track 1st fail */
+			error_ret = ret;
 		put_page(pages[i]);
 	}
 
-	return 0;
+	/* If there was an error for any gfn, return non-0. */
+	return error_ret;
 }
 
 static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 0172a949f6a7..a777bb43dfa0 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -532,6 +532,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	unsigned pte_access;
 	gfn_t gfn;
 	kvm_pfn_t pfn;
+	int set_spte_ret;
 
 	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 		return false;
@@ -550,11 +551,12 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * we call mmu_set_spte() with host_writable = true because
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn,
-		     true, true);
+	set_spte_ret = mmu_set_spte(vcpu, spte, pte_access, 0,
+				    PG_LEVEL_4K, gfn, pfn, true, true);
 
 	kvm_release_pfn_clean(pfn);
-	return true;
+
+	return set_spte_ret >= 0;
 }
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1011,7 +1013,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
-	int set_spte_ret = 0;
+	int ret;
+	int accum_set_spte_flags = 0;
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -1064,17 +1067,19 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 			continue;
 		}
 
-		nr_present++;
-
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
-					 pte_access, PG_LEVEL_4K,
-					 gfn, spte_to_pfn(sp->spt[i]),
-					 true, false, host_writable);
+		ret = set_spte(vcpu, &sp->spt[i], pte_access,
+			       PG_LEVEL_4K, gfn,
+			       spte_to_pfn(sp->spt[i]), true, false,
+			       host_writable);
+		if (ret >= 0) {
+			nr_present++;
+			accum_set_spte_flags |= ret;
+		}
 	}
 
-	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
+	if (accum_set_spte_flags & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 
 	return nr_present;
-- 
2.17.1