lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1536234182-2809-6-git-send-email-brijesh.singh@amd.com>
Date:   Thu,  6 Sep 2018 06:43:02 -0500
From:   Brijesh Singh <brijesh.singh@....com>
To:     x86@...nel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc:     Brijesh Singh <brijesh.singh@....com>,
        Tom Lendacky <thomas.lendacky@....com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...e.de>, "H. Peter Anvin" <hpa@...or.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Radim Krčmář <rkrcmar@...hat.com>
Subject: [PATCH v5 5/5] x86/kvm: Avoid dynamic allocation of pvclock data when SEV is active

Currently, the per-cpu pvclock data is allocated dynamically when
cpu > HVC_BOOT_ARRAY_SIZE. The physical address of this variable is
shared between the guest and the hypervisor hence it must be mapped as
unencrypted (ie. C=0) when SEV is active.

When SEV is active, we will be wasting fairly sizeable amount of memory
since each CPU will be doing a separate 4k allocation so that it can clear
C-bit. Let's define few extra static page sized array of pvclock data.
In the preparatory stage of CPU hotplug, use the element of this static
array to avoid the dynamic allocation. This array will be put in
the .data..decrypted section so that its mapped with C=0 during the boot.

In non-SEV case, this static page will unused and free'd by the
free_decrypted_mem().

Signed-off-by: Brijesh Singh <brijesh.singh@....com>
Suggested-by: Sean Christopherson <sean.j.christopherson@...el.com>
Cc: Tom Lendacky <thomas.lendacky@....com>
Cc: kvm@...r.kernel.org
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...e.de>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-kernel@...r.kernel.org
Cc: Paolo Bonzini <pbonzini@...hat.com>
Cc: Sean Christopherson <sean.j.christopherson@...el.com>
Cc: kvm@...r.kernel.org
Cc: "Radim Krčmář" <rkrcmar@...hat.com>
---
 arch/x86/include/asm/mem_encrypt.h |  4 ++++
 arch/x86/kernel/kvmclock.c         | 22 +++++++++++++++++++---
 arch/x86/kernel/vmlinux.lds.S      |  3 +++
 arch/x86/mm/init.c                 |  3 +++
 arch/x86/mm/mem_encrypt.c          | 10 ++++++++++
 5 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 802b2eb..aa204af 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,11 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
+void __init free_decrypted_mem(void);
 
 bool sme_active(void);
 bool sev_active(void);
 
 #define __decrypted __attribute__((__section__(".data..decrypted")))
+#define __decrypted_hvclock __attribute__((__section__(".data..decrypted_hvclock")))
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
@@ -80,6 +82,7 @@ static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 
 #define __decrypted
+#define __decrypted_hvclock
 
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
@@ -93,6 +96,7 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 #define __sme_pa_nodebug(x)	(__pa_nodebug(x) | sme_me_mask)
 
 extern char __start_data_decrypted[], __end_data_decrypted[];
+extern char __start_data_decrypted_hvclock[];
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 376fd3a..5b88773 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -65,6 +65,13 @@ static struct pvclock_vsyscall_time_info
 static struct pvclock_wall_clock wall_clock __decrypted;
 static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
 
+
+/* This should cover upto 512 VCPUS (first 64 are covered by hv_clock_boot[]). */
+#define HVC_DECRYPTED_ARRAY_SIZE \
+	((PAGE_SIZE * 7)  / sizeof(struct pvclock_vsyscall_time_info))
+static struct pvclock_vsyscall_time_info
+			hv_clock_dec[HVC_DECRYPTED_ARRAY_SIZE] __decrypted_hvclock;
+
 static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
 {
 	return &this_cpu_read(hv_clock_per_cpu)->pvti;
@@ -267,10 +274,19 @@ static int kvmclock_setup_percpu(unsigned int cpu)
 		return 0;
 
 	/* Use the static page for the first CPUs, allocate otherwise */
-	if (cpu < HVC_BOOT_ARRAY_SIZE)
+	if (cpu < HVC_BOOT_ARRAY_SIZE) {
 		p = &hv_clock_boot[cpu];
-	else
-		p = kzalloc(sizeof(*p), GFP_KERNEL);
+	} else {
+		/*
+		 * When SEV is active, use the static pages from
+		 * .data..decrypted_hvclock section. The pages are already
+		 * mapped with C=0.
+		 */
+		if (sev_active())
+			p = &hv_clock_dec[cpu - HVC_BOOT_ARRAY_SIZE];
+		else
+			p = kzalloc(sizeof(*p), GFP_KERNEL);
+	}
 
 	per_cpu(hv_clock_per_cpu, cpu) = p;
 	return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 78d3169..1aec291 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -101,6 +101,9 @@ PHDRS {
 	. = ALIGN(PMD_SIZE);					\
 	__start_data_decrypted = .;				\
 	*(.data..decrypted);					\
+	. = ALIGN(PAGE_SIZE);					\
+	__start_data_decrypted_hvclock = .;			\
+	*(.data..decrypted_hvclock);				\
 	. = ALIGN(PMD_SIZE);					\
 	__end_data_decrypted = .;				\
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26..052b279 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,9 +815,12 @@ void free_kernel_image_pages(void *begin, void *end)
 		set_memory_np_noalias(begin_ul, len_pages);
 }
 
+void __weak free_decrypted_mem(void) { }
+
 void __ref free_initmem(void)
 {
 	e820__reallocate_tables();
+	free_decrypted_mem();
 
 	free_kernel_image_pages(&__init_begin, &__init_end);
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398..865b1ad 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,16 @@ bool sev_active(void)
 EXPORT_SYMBOL(sev_active);
 
 /* Architecture __weak replacement functions */
+void __init free_decrypted_mem(void)
+{
+	if (mem_encrypt_active())
+		return;
+
+	free_init_pages("unused decrypted",
+			(unsigned long)__start_data_decrypted_hvclock,
+			(unsigned long)__end_data_decrypted);
+}
+
 void __init mem_encrypt_init(void)
 {
 	if (!sme_me_mask)
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ