[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <69c81e09-eb73-060d-2429-ea3a4e0c1e9a@linux.intel.com>
Date: Wed, 26 Apr 2023 09:46:47 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Zeng Guang <guang.zeng@...el.com>,
Paolo Bonzini <pbonzini@...hat.com>,
"Christopherson,, Sean" <seanjc@...gle.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
H Peter Anvin <hpa@...or.com>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>
Cc: "x86@...nel.org" <x86@...nel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"Gao, Chao" <chao.gao@...el.com>
Subject: Re: [PATCH 2/6] KVM: VMX: Add new ops in kvm_x86_ops for LASS
violation check
On 4/25/2023 11:26 AM, Zeng Guang wrote:
>
> On 4/24/2023 3:43 PM, Binbin Wu wrote:
>>
[...]
>> On 4/20/2023 9:37 PM, Zeng Guang wrote:
>>
>> diff --git a/arch/x86/include/asm/kvm-x86-ops.h
>> b/arch/x86/include/asm/kvm-x86-ops.h
>> index abccd51dcfca..f76c07f2674b 100644
>> --- a/arch/x86/include/asm/kvm-x86-ops.h
>> +++ b/arch/x86/include/asm/kvm-x86-ops.h
>> @@ -131,6 +131,7 @@ KVM_X86_OP(msr_filter_changed)
>> KVM_X86_OP(complete_emulated_msr)
>> KVM_X86_OP(vcpu_deliver_sipi_vector)
>> KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
>> +KVM_X86_OP_OPTIONAL_RET0(check_lass);
>> #undef KVM_X86_OP
>> #undef KVM_X86_OP_OPTIONAL
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h
>> index 8ff89a52ef66..31fb8699a1ff 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -69,6 +69,9 @@
>> #define KVM_X86_NOTIFY_VMEXIT_VALID_BITS
>> (KVM_X86_NOTIFY_VMEXIT_ENABLED | \
>> KVM_X86_NOTIFY_VMEXIT_USER)
>> +/* x86-specific emulation flags */
>> +#define KVM_X86_EMULFLAG_SKIP_LASS _BITULL(1)
>> Do you use the flag outside of emulator?
>> For LAM patch, it's planned to move the flags inside emulator.
> IMO, the detailed flag is implementation specific. Is it necessary to
> bind with emulator
> though it's only used inside emulator ?
For the rest part (i.e., VMExit handlings), the code is already in the
vendor specific implementations.
The callers are aware of the information to skip LASS check or not.
I plan to do a cleanup to consolidate the flags into one parameter for
__linearize().
And the consolidated flags value will be extended for LAM and other
features (e.g. LASS).
I post the proposed patch as following, could you help to check wether
it is OK for LASS to follow?
arch/x86/kvm/emulate.c | 20 ++++++++++++++------
arch/x86/kvm/kvm_emulate.h | 4 ++++
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a20bec931764..5fb516bc5731 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -687,8 +687,8 @@ static unsigned insn_alignment(struct
x86_emulate_ctxt *ctxt, unsigned size)
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned *max_size, unsigned size,
- bool write, bool fetch,
- enum x86emul_mode mode, ulong *linear)
+ u64 flags, enum x86emul_mode mode,
+ ulong *linear)
{
struct desc_struct desc;
bool usable;
@@ -696,6 +696,8 @@ static __always_inline int __linearize(struct
x86_emulate_ctxt *ctxt,
u32 lim;
u16 sel;
u8 va_bits;
+ bool fetch = !!(flags & X86_EMULFLAG_FETCH);
+ bool write = !!(flags & X86_EMULFLAG_WRITE);
la = seg_base(ctxt, addr.seg) + addr.ea;
*max_size = 0;
@@ -757,7 +759,12 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ulong *linear)
{
unsigned max_size;
- return __linearize(ctxt, addr, &max_size, size, write, false,
+ u64 flags = 0;
+
+ if (write)
+ flags |= X86_EMULFLAG_WRITE;
+
+ return __linearize(ctxt, addr, &max_size, size, flags,
ctxt->mode, linear);
}
@@ -768,10 +775,11 @@ static inline int assign_eip(struct
x86_emulate_ctxt *ctxt, ulong dst)
unsigned max_size;
struct segmented_address addr = { .seg = VCPU_SREG_CS,
.ea = dst };
+ u64 flags = X86_EMULFLAG_FETCH;
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode,
&linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, flags, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
@@ -896,6 +904,7 @@ static int __do_insn_fetch_bytes(struct
x86_emulate_ctxt *ctxt, int op_size)
int cur_size = ctxt->fetch.end - ctxt->fetch.data;
struct segmented_address addr = { .seg = VCPU_SREG_CS,
.ea = ctxt->eip + cur_size };
+ u64 flags = X86_EMULFLAG_FETCH;
/*
* We do not know exactly how many bytes will be needed, and
@@ -907,8 +916,7 @@ static int __do_insn_fetch_bytes(struct
x86_emulate_ctxt *ctxt, int op_size)
* boundary check itself. Instead, we use max_size to check
* against op_size.
*/
- rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
- &linear);
+ rc = __linearize(ctxt, addr, &max_size, 0, flags, ctxt->mode, &linear);
if (unlikely(rc != X86EMUL_CONTINUE))
return rc;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index ab65f3a47dfd..5451a37f135f 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -88,6 +88,10 @@ struct x86_instruction_info {
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
+/* x86-specific emulation flags */
+#define X86_EMULFLAG_FETCH _BITULL(0)
+#define X86_EMULFLAG_WRITE _BITULL(1)
+
struct x86_emulate_ops {
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
/*
>>> +
>>> /* x86-specific vcpu->requests bit members */
>>> #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
>>> #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
>>> @@ -1706,6 +1709,8 @@ struct kvm_x86_ops {
>>> * Returns vCPU specific APICv inhibit reasons
>>> */
>>> unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct
>>> kvm_vcpu *vcpu);
>>> +
>>> + bool (*check_lass)(struct kvm_vcpu *vcpu, u64 access, u64 la,
>>> u64 flags);
>> The flags may be dropped if the caller knows to skip it or not.
> Probably I don't get you right. Do you mean it need define another
> function without flags ?
>
>>> };
>>> struct kvm_x86_nested_ops {
>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>>> index c923d7599d71..581327ede66a 100644
>>> --- a/arch/x86/kvm/vmx/vmx.c
>>> +++ b/arch/x86/kvm/vmx/vmx.c
>>> @@ -8070,6 +8070,59 @@ static void vmx_vm_destroy(struct kvm *kvm)
>>> free_pages((unsigned long)kvm_vmx->pid_table,
>>> vmx_get_pid_table_order(kvm));
>>> }
>>> +/*
>>> + * Determine whether an access to the linear address causes a LASS
>>> violation.
>>> + * LASS protection is only effective in long mode. As a
>>> prerequisite, caller
>>> + * should make sure VM
>> Should be vCPU?
> Similar meaning, I think. :)
>>> running in long mode and invoke this api to do LASS
>>> + * violation check.
>>> + */
>>> +bool __vmx_check_lass(struct kvm_vcpu *vcpu, u64 access, u64 la,
>>> u64 flags)
>>> +{
>>> + bool user_mode, user_as, rflags_ac;
>>> +
>>> + if (!!(flags & KVM_X86_EMULFLAG_SKIP_LASS) ||
>>> + !kvm_is_cr4_bit_set(vcpu, X86_CR4_LASS))
>>> + return false;
>>> +
>>> + WARN_ON_ONCE(!is_long_mode(vcpu));
>>> +
>>> + user_as = !(la >> 63);
>>> +
>>> + /*
>>> + * An access is a supervisor-mode access if CPL < 3 or if it
>>> implicitly
>>> + * accesses a system data structure. For implicit accesses to
>>> system
>>> + * data structure, the processor acts as if RFLAGS.AC is clear.
>>> + */
>>> + if (access & PFERR_IMPLICIT_ACCESS) {
>>> + user_mode = false;
>>> + rflags_ac = false;
>>> + } else {
>>> + user_mode = vmx_get_cpl(vcpu) == 3;
>>> + if (!user_mode)
>>> + rflags_ac = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
>>> + }
>>> +
>>> + if (user_mode != user_as) {
>>> + /*
>>> + * Supervisor-mode _data_ accesses to user address space
>>> + * cause LASS violations only if SMAP is enabled.
>>> + */
>>> + if (!user_mode && !(access & PFERR_FETCH_MASK)) {
>>> + return kvm_is_cr4_bit_set(vcpu, X86_CR4_SMAP) &&
>>> + !rflags_ac;
>>> + } else {
>>> + return true;
>>> + }
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> +static bool vmx_check_lass(struct kvm_vcpu *vcpu, u64 access, u64
>>> la, u64 flags)
>>> +{
>>> + return is_long_mode(vcpu) && __vmx_check_lass(vcpu, access, la,
>>> flags);
>>> +}
>>> +
>>> static struct kvm_x86_ops vmx_x86_ops __initdata = {
>>> .name = "kvm_intel",
>>> @@ -8207,6 +8260,8 @@ static struct kvm_x86_ops vmx_x86_ops
>>> __initdata = {
>>> .complete_emulated_msr = kvm_complete_insn_gp,
>>> .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
>>> +
>>> + .check_lass = vmx_check_lass,
>>> };
>>> static unsigned int vmx_handle_intel_pt_intr(void)
>>> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
>>> index a3da84f4ea45..6569385a5978 100644
>>> --- a/arch/x86/kvm/vmx/vmx.h
>>> +++ b/arch/x86/kvm/vmx/vmx.h
>>> @@ -433,6 +433,8 @@ void vmx_enable_intercept_for_msr(struct
>>> kvm_vcpu *vcpu, u32 msr, int type);
>>> u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
>>> u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
>>> +bool __vmx_check_lass(struct kvm_vcpu *vcpu, u64 access, u64 la,
>>> u64 flags);
>>> +
>>> static inline void vmx_set_intercept_for_msr(struct kvm_vcpu
>>> *vcpu, u32 msr,
>>> int type, bool value)
>>> {
Powered by blists - more mailing lists