[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220511022751.65540-2-kirill.shutemov@linux.intel.com>
Date: Wed, 11 May 2022 05:27:41 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: Dave Hansen <dave.hansen@...ux.intel.com>,
Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>
Cc: x86@...nel.org, Andrey Ryabinin <aryabinin@...tuozzo.com>,
Alexander Potapenko <glider@...gle.com>,
Dmitry Vyukov <dvyukov@...gle.com>,
"H . J . Lu" <hjl.tools@...il.com>,
Andi Kleen <ak@...ux.intel.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCH] x86: Implement Linear Address Masking support
Linear Address Masking feature makes CPU ignore some bits of the virtual
address. These bits can be used to encode metadata.
The feature is enumerated with CPUID.(EAX=07H, ECX=01H):EAX.LAM[bit 26].
CR3.LAM_U57[bit 62] allows to encode 6 bits of metadata in bits 62:57 of
user pointers.
CR3.LAM_U48[bit 61] allows to encode 15 bits of metadata in bits 62:48
of user pointers.
CR4.LAM_SUP[bit 28] allows to encode metadata of supervisor pointers.
If 5-level paging is in use, 6 bits of metadata can be encoded in 62:57.
For 4-level paging, 15 bits of metadata can be encoded in bits 62:48.
QEMU strips address from the metadata bits and gets it to canonical
shape before handling memory access. It has to be done very early before
TLB lookup.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
accel/tcg/cputlb.c | 20 +++++++++++++++++---
include/hw/core/tcg-cpu-ops.h | 5 +++++
target/i386/cpu.c | 4 ++--
target/i386/cpu.h | 26 +++++++++++++++++++++++++-
target/i386/helper.c | 2 +-
target/i386/tcg/helper-tcg.h | 1 +
target/i386/tcg/sysemu/excp_helper.c | 28 +++++++++++++++++++++++++++-
target/i386/tcg/sysemu/misc_helper.c | 3 +--
target/i386/tcg/sysemu/svm_helper.c | 3 +--
target/i386/tcg/tcg-cpu.c | 1 +
10 files changed, 81 insertions(+), 12 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 2035b2ac0ac0..15eff0df39c1 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1295,6 +1295,17 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
return ram_addr;
}
+static vaddr clean_addr(CPUArchState *env, vaddr addr)
+{
+ CPUClass *cc = CPU_GET_CLASS(env_cpu(env));
+
+ if (cc->tcg_ops->do_clean_addr) {
+ addr = cc->tcg_ops->do_clean_addr(env_cpu(env), addr);
+ }
+
+ return addr;
+}
+
/*
* Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
* caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
@@ -1757,10 +1768,11 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
*
* @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
*/
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong address,
MemOpIdx oi, int size, int prot,
uintptr_t retaddr)
{
+ target_ulong addr = clean_addr(env, address);
size_t mmu_idx = get_mmuidx(oi);
MemOp mop = get_memop(oi);
int a_bits = get_alignment_bits(mop);
@@ -1904,10 +1916,11 @@ load_memop(const void *haddr, MemOp op)
}
static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+load_helper(CPUArchState *env, target_ulong address, MemOpIdx oi,
uintptr_t retaddr, MemOp op, bool code_read,
FullLoadHelper *full_load)
{
+ target_ulong addr = clean_addr(env, address);
uintptr_t mmu_idx = get_mmuidx(oi);
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -2307,9 +2320,10 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
}
static inline void QEMU_ALWAYS_INLINE
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
+store_helper(CPUArchState *env, target_ulong address, uint64_t val,
MemOpIdx oi, uintptr_t retaddr, MemOp op)
{
+ target_ulong addr = clean_addr(env, address);
uintptr_t mmu_idx = get_mmuidx(oi);
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index e13898553aff..8e81f45510bf 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -82,6 +82,11 @@ struct TCGCPUOps {
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr) QEMU_NORETURN;
+ /**
+ * @do_clean_addr: Callback for clearing metadata/tags from the address.
+ */
+ vaddr (*do_clean_addr)(CPUState *cpu, vaddr addr);
+
/**
* @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
*/
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index cb6b5467d067..6e3e8473bf04 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -662,7 +662,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
/* CPUID_7_0_ECX_OSPKE is dynamic */ \
CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS)
#define TCG_7_0_EDX_FEATURES 0
-#define TCG_7_1_EAX_FEATURES 0
+#define TCG_7_1_EAX_FEATURES CPUID_7_1_EAX_LAM
#define TCG_APM_FEATURES 0
#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
#define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -876,7 +876,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ NULL, NULL, "lam", NULL,
NULL, NULL, NULL, NULL,
},
.cpuid = {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 982c5323537c..5d6cc8efb7da 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -232,6 +232,9 @@ typedef enum X86Seg {
#define CR0_CD_MASK (1U << 30)
#define CR0_PG_MASK (1U << 31)
+#define CR3_LAM_U57 (1ULL << 61)
+#define CR3_LAM_U48 (1ULL << 62)
+
#define CR4_VME_MASK (1U << 0)
#define CR4_PVI_MASK (1U << 1)
#define CR4_TSD_MASK (1U << 2)
@@ -255,6 +258,7 @@ typedef enum X86Seg {
#define CR4_SMAP_MASK (1U << 21)
#define CR4_PKE_MASK (1U << 22)
#define CR4_PKS_MASK (1U << 24)
+#define CR4_LAM_SUP (1U << 28)
#define CR4_RESERVED_MASK \
(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
@@ -263,7 +267,8 @@ typedef enum X86Seg {
| CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK | CR4_UMIP_MASK \
| CR4_LA57_MASK \
| CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \
- | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK))
+ | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \
+ | CR4_LAM_SUP))
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
@@ -877,6 +882,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
+/* Linear Address Masking */
+#define CPUID_7_1_EAX_LAM (1U << 26)
/* XFD Extend Feature Disabled */
#define CPUID_D_1_EAX_XFD (1U << 4)
@@ -2287,6 +2294,23 @@ static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat)
return !!(cpu->hyperv_features & BIT(feat));
}
+static inline uint64_t cr3_reserved_bits(CPUX86State *env)
+{
+ uint64_t reserved_bits;
+
+ if (!(env->efer & MSR_EFER_LMA)) {
+ return 0;
+ }
+
+ reserved_bits = (~0ULL) << env_archcpu(env)->phys_bits;
+
+ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM) {
+ reserved_bits &= ~(CR3_LAM_U48 | CR3_LAM_U57);
+ }
+
+ return reserved_bits;
+}
+
static inline uint64_t cr4_reserved_bits(CPUX86State *env)
{
uint64_t reserved_bits = CR4_RESERVED_MASK;
diff --git a/target/i386/helper.c b/target/i386/helper.c
index fa409e9c44a8..f91ebab840d6 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -247,7 +247,7 @@ hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
}
if (la57) {
- pml5e_addr = ((env->cr[3] & ~0xfff) +
+ pml5e_addr = ((env->cr[3] & PG_ADDRESS_MASK) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
pml5e = x86_ldq_phys(cs, pml5e_addr);
if (!(pml5e & PG_PRESENT_MASK)) {
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 0a4401e917f9..03ab858598d2 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -51,6 +51,7 @@ void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
+vaddr x86_cpu_clean_addr(CPUState *cpu, vaddr addr);
#endif
void breakpoint_handler(CPUState *cs);
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index e1b6d8868338..caaab413381b 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -64,7 +64,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_f
uint64_t pml4e_addr, pml4e;
if (la57) {
- pml5e_addr = ((cr3 & ~0xfff) +
+ pml5e_addr = ((cr3 & PG_ADDRESS_MASK) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
pml5e = x86_ldq_phys(cs, pml5e_addr);
@@ -437,3 +437,29 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
}
return true;
}
+
+static inline int64_t sign_extend64(uint64_t value, int index)
+{
+ int shift = 63 - index;
+ return (int64_t)(value << shift) >> shift;
+}
+
+vaddr x86_cpu_clean_addr(CPUState *cs, vaddr addr)
+{
+ CPUX86State *env = &X86_CPU(cs)->env;
+ bool la57 = env->cr[4] & CR4_LA57_MASK;
+
+ if (addr >> 63) {
+ if (env->cr[4] & CR4_LAM_SUP) {
+ return sign_extend64(addr, la57 ? 56 : 47);
+ }
+ } else {
+ if (env->cr[3] & CR3_LAM_U57) {
+ return sign_extend64(addr, 56);
+ } else if (env->cr[3] & CR3_LAM_U48) {
+ return sign_extend64(addr, 47);
+ }
+ }
+
+ return addr;
+}
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 3715c1e2625b..faeb4a16383c 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -97,8 +97,7 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
cpu_x86_update_cr0(env, t0);
break;
case 3:
- if ((env->efer & MSR_EFER_LMA) &&
- (t0 & ((~0ULL) << env_archcpu(env)->phys_bits))) {
+ if (t0 & cr3_reserved_bits(env)) {
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
if (!(env->efer & MSR_EFER_LMA)) {
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 2b6f450af959..cbd99f240bb8 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -287,8 +287,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
new_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr3));
- if ((env->efer & MSR_EFER_LMA) &&
- (new_cr3 & ((~0ULL) << cpu->phys_bits))) {
+ if (new_cr3 & cr3_reserved_bits(env)) {
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
new_cr4 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4));
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 6fdfdf959899..754454d19041 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -77,6 +77,7 @@ static const struct TCGCPUOps x86_tcg_ops = {
.record_sigsegv = x86_cpu_record_sigsegv,
#else
.tlb_fill = x86_cpu_tlb_fill,
+ .do_clean_addr = x86_cpu_clean_addr,
.do_interrupt = x86_cpu_do_interrupt,
.cpu_exec_interrupt = x86_cpu_exec_interrupt,
.debug_excp_handler = breakpoint_handler,
--
2.35.1
Powered by blists - more mailing lists