[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220124150215.36893-25-kirill.shutemov@linux.intel.com>
Date: Mon, 24 Jan 2022 18:02:10 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...el.com, luto@...nel.org, peterz@...radead.org
Cc: sathyanarayanan.kuppuswamy@...ux.intel.com, aarcange@...hat.com,
ak@...ux.intel.com, dan.j.williams@...el.com, david@...hat.com,
hpa@...or.com, jgross@...e.com, jmattson@...gle.com,
joro@...tes.org, jpoimboe@...hat.com, knsathya@...nel.org,
pbonzini@...hat.com, sdeep@...are.com, seanjc@...gle.com,
tony.luck@...el.com, vkuznets@...hat.com, wanpengli@...cent.com,
x86@...nel.org, linux-kernel@...r.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
Sean Christopherson <sean.j.christopherson@...el.com>,
Kai Huang <kai.huang@...ux.intel.com>
Subject: [PATCHv2 24/29] x86/mm/cpa: Add support for TDX shared memory
TDX steals a bit from the physical address and uses it to indicate
whether the page is private to the guest (bit set 0) or unprotected
and shared with the VMM (bit set 1).
AMD SEV uses a similar scheme, repurposing a bit from the physical address
to indicate encrypted or decrypted pages.
The kernel already has the infrastructure to deal with encrypted/decrypted
pages for AMD SEV. Modify the __set_memory_enc_pgtable() and make it
aware about TDX.
After modifying page table entries, the kernel needs to notify VMM about
the change with tdx_hcall_request_gpa_type().
Co-developed-by: Sean Christopherson <sean.j.christopherson@...el.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@...el.com>
Co-developed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
Tested-by: Kai Huang <kai.huang@...ux.intel.com>
Reviewed-by: Andi Kleen <ak@...ux.intel.com>
Reviewed-by: Tony Luck <tony.luck@...el.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/mem_encrypt.h | 8 ++++++
arch/x86/include/asm/set_memory.h | 1 -
arch/x86/kernel/cc_platform.c | 2 ++
arch/x86/mm/mem_encrypt_amd.c | 10 ++++---
arch/x86/mm/pat/set_memory.c | 44 ++++++++++++++++++++++++++----
include/linux/cc_platform.h | 9 ++++++
7 files changed, 64 insertions(+), 12 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 680c3cad9422..33e6ec6fd89f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -886,7 +886,7 @@ config INTEL_TDX_GUEST
depends on X86_X2APIC
select ARCH_HAS_CC_PLATFORM
select X86_MCE
- select DYNAMIC_PHYSICAL_MASK
+ select X86_MEM_ENCRYPT
help
Support running as a guest under Intel TDX. Without this support,
the guest kernel can not boot or run under TDX.
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index e2c6f433ed10..f45a9ea2dec9 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -52,6 +52,8 @@ void __init mem_encrypt_free_decrypted_mem(void);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+int amd_notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
+
void __init sev_es_init_vc_handling(void);
#define __bss_decrypted __section(".bss..decrypted")
@@ -85,6 +87,12 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
static inline void mem_encrypt_free_decrypted_mem(void) { }
+static inline int amd_notify_range_enc_status_changed(unsigned long vaddr,
+ int npages, bool enc)
+{
+ return 0;
+}
+
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a..ce8dd215f5b3 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
index be8722ad4792..1fbcf19fa20d 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/kernel/cc_platform.c
@@ -21,6 +21,8 @@ static bool intel_cc_platform_has(enum cc_attr attr)
switch (attr) {
case CC_ATTR_GUEST_UNROLL_STRING_IO:
case CC_ATTR_HOTPLUG_DISABLED:
+ case CC_ATTR_GUEST_TDX:
+ case CC_ATTR_GUEST_MEM_ENCRYPT:
return true;
default:
return false;
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 2b2d018ea345..6aa4e0c27368 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -256,7 +256,8 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
return pfn;
}
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+int amd_notify_range_enc_status_changed(unsigned long vaddr, int npages,
+ bool enc)
{
#ifdef CONFIG_PARAVIRT
unsigned long sz = npages << PAGE_SHIFT;
@@ -270,7 +271,7 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
kpte = lookup_address(vaddr, &level);
if (!kpte || pte_none(*kpte)) {
WARN_ONCE(1, "kpte lookup for vaddr\n");
- return;
+ return 0;
}
pfn = pg_level_to_pfn(level, kpte, NULL);
@@ -285,6 +286,7 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
vaddr = (vaddr & pmask) + psize;
}
#endif
+ return 0;
}
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
@@ -392,7 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0;
- notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+ amd_notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out:
__flush_tlb_all();
return ret;
@@ -410,7 +412,7 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
{
- notify_range_enc_status_changed(vaddr, npages, enc);
+ amd_notify_range_enc_status_changed(vaddr, npages, enc);
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef..06c65689d6fb 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -32,6 +32,7 @@
#include <asm/set_memory.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
+#include <asm/tdx.h>
#include "../mm_internal.h"
@@ -1983,6 +1984,27 @@ int set_memory_global(unsigned long addr, int numpages)
__pgprot(_PAGE_GLOBAL), 0);
}
+static pgprot_t pgprot_cc_mask(bool enc)
+{
+ if (enc)
+ return pgprot_encrypted(__pgprot(0));
+ else
+ return pgprot_decrypted(__pgprot(0));
+}
+
+static int notify_range_enc_status_changed(unsigned long vaddr, int npages,
+ bool enc)
+{
+ if (cc_platform_has(CC_ATTR_GUEST_TDX)) {
+ phys_addr_t start = __pa(vaddr);
+ phys_addr_t end = __pa(vaddr + npages * PAGE_SIZE);
+
+ return tdx_hcall_request_gpa_type(start, end, enc);
+ } else {
+ return amd_notify_range_enc_status_changed(vaddr, npages, enc);
+ }
+}
+
/*
* __set_memory_enc_pgtable() is used for the hypervisors that get
* informed about "encryption" status via page tables.
@@ -1999,8 +2021,10 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
memset(&cpa, 0, sizeof(cpa));
cpa.vaddr = &addr;
cpa.numpages = numpages;
- cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
- cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+
+ cpa.mask_set = pgprot_cc_mask(enc);
+ cpa.mask_clr = pgprot_cc_mask(!enc);
+
cpa.pgd = init_mm.pgd;
/* Must avoid aliasing mappings in the highmem code */
@@ -2008,9 +2032,17 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
vm_unmap_aliases();
/*
- * Before changing the encryption attribute, we need to flush caches.
+ * Before changing the encryption attribute, flush caches.
+ *
+ * For TDX, guest is responsible for flushing caches on private->shared
+ * transition. VMM is responsible for flushing on shared->private.
*/
- cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
+ if (cc_platform_has(CC_ATTR_GUEST_TDX)) {
+ if (!enc)
+ cpa_flush(&cpa, 1);
+ } else {
+ cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
+ }
ret = __change_page_attr_set_clr(&cpa, 1);
@@ -2027,8 +2059,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
* Notify hypervisor that a given memory range is mapped encrypted
* or decrypted.
*/
- notify_range_enc_status_changed(addr, numpages, enc);
-
+ if (!ret)
+ ret = notify_range_enc_status_changed(addr, numpages, enc);
return ret;
}
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 691494bbaf5a..16c0ad925bf0 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -82,6 +82,15 @@ enum cc_attr {
* Examples include TDX Guest.
*/
CC_ATTR_HOTPLUG_DISABLED,
+
+ /**
+ * @CC_ATTR_GUEST_TDX: Trust Domain Extension Support
+ *
+ * The platform/OS is running as a TDX guest/virtual machine.
+ *
+ * Examples include Intel TDX.
+ */
+ CC_ATTR_GUEST_TDX = 0x100,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
--
2.34.1
Powered by blists - more mailing lists