linux-kernel - Re: [PATCH 6/9] x86/kvm/mmu: make space for source data caching in struct kvm

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87h8iky4cb.fsf@vitty.brq.redhat.com>
Date:   Thu, 20 Sep 2018 10:12:52 +0200
From:   Vitaly Kuznetsov <vkuznets@...hat.com>
To:     Sean Christopherson <sean.j.christopherson@...el.com>
Cc:     kvm@...r.kernel.org, Paolo Bonzini <pbonzini@...hat.com>,
        Radim Krčmář <rkrcmar@...hat.com>,
        linux-kernel@...r.kernel.org, Jim Mattson <jmattson@...gle.com>,
        Liran Alon <liran.alon@...cle.com>
Subject: Re: [PATCH 6/9] x86/kvm/mmu: make space for source data caching in struct kvm_mmu

Sean Christopherson <sean.j.christopherson@...el.com> writes:

> On Thu, 2018-08-02 at 12:01 +0200, Vitaly Kuznetsov wrote:
>> In preparation to MMU reconfiguration avoidance we need a space to
>> cache source data. As this partially intersects with kvm_mmu_page_role,
>> create 64bit sized union kvm_mmu_role holding both base_role and
>> extended data. No functional change.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@...hat.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h | 14 +++++++++++++-
>>  arch/x86/kvm/mmu.c              | 19 ++++++++++++-------
>>  arch/x86/kvm/vmx.c              |  2 +-
>>  3 files changed, 26 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index c5f116f9783d..830166ab4d59 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -272,6 +272,18 @@ union kvm_mmu_page_role {
>>  	};
>>  };
>>  
>> +union kvm_mmu_scache {
>> +	unsigned int word;
>> +};
>> +
>> +union kvm_mmu_role {
>> +	unsigned long as_u64;
>
> No clue if it matters, but can't this be a u32 since both
> kvm_mmu_page_role and kvm_mmu_scache are capped at 16 bits?

kvm_mmu_page_role is 32 bits, even if we make 'smm' a 1-bit field and
throw away its padding we'll end up with 19 bits ...

(Generally speaking, I would prefer to leave some space for future
extension: we only have 3 'struct kvm_mmu' per vcpu so we're talking
about 12 bytes. We can, of course, change types here in future - this in
not a stable ABI or anything - but the one doing that will have to check
all users and this is always error-prone.)

>
> Tangentially related, it seems like we should have build-
> time asserts on the size of the unions since we're (ab)using
> union behavior to dereference the entire value in a single
> shot, e.g. base_role.word and mmu_role->as_u64.

(I was told that union aliasing is UB, however, it's all over kernel so
it's unlikely that gcc's behavior is ever going to change).

I like your idea, I'll add a couple of BUILD_BUG_ONs in v2. Thanks!

>
>> +	struct {
>> +		union kvm_mmu_page_role base_role;
>> +		union kvm_mmu_scache scache;
>> +	};
>> +};
>> +
>>  struct kvm_rmap_head {
>>  	unsigned long val;
>>  };
>> @@ -359,7 +371,7 @@ struct kvm_mmu {
>>  	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>>  			   u64 *spte, const void *pte);
>>  	hpa_t root_hpa;
>> -	union kvm_mmu_page_role base_role;
>> +	union kvm_mmu_role mmu_role;
>>  	u8 root_level;
>>  	u8 shadow_root_level;
>>  	u8 ept_ad;
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 85ec027299d6..c538e47e471b 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -2331,7 +2331,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>  	int collisions = 0;
>>  	LIST_HEAD(invalid_list);
>>  
>> -	role = vcpu->arch.mmu->base_role;
>> +	role = vcpu->arch.mmu->mmu_role.base_role;
>>  	role.level = level;
>>  	role.direct = direct;
>>  	if (role.direct)
>> @@ -4377,7 +4377,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
>>  void
>>  reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
>>  {
>> -	bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
>> +	bool uses_nx = context->nx ||
>> +		context->mmu_role.base_role.smep_andnot_wp;
>>  	struct rsvd_bits_validate *shadow_zero_check;
>>  	int i;
>>  
>> @@ -4696,7 +4697,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>>  {
>>  	struct kvm_mmu *context = vcpu->arch.mmu;
>>  
>> -	context->base_role.word = mmu_base_role_mask.word &
>> +	context->mmu_role.base_role.word = mmu_base_role_mask.word &
>>  				  kvm_calc_tdp_mmu_root_page_role(vcpu).word;
>>  	context->page_fault = tdp_page_fault;
>>  	context->sync_page = nonpaging_sync_page;
>> @@ -4777,7 +4778,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
>>  	else
>>  		paging32_init_context(vcpu, context);
>>  
>> -	context->base_role.word = mmu_base_role_mask.word &
>> +	context->mmu_role.base_role.word = mmu_base_role_mask.word &
>>  				  kvm_calc_shadow_mmu_root_page_role(vcpu).word;
>>  	reset_shadow_zero_bits_mask(vcpu, context);
>>  }
>> @@ -4786,7 +4787,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
>>  static union kvm_mmu_page_role
>>  kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
>>  {
>> -	union kvm_mmu_page_role role = vcpu->arch.mmu->base_role;
>> +	union kvm_mmu_page_role role = vcpu->arch.mmu->mmu_role.base_role;
>>  
>>  	role.level = PT64_ROOT_4LEVEL;
>>  	role.direct = false;
>> @@ -4816,7 +4817,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
>>  	context->update_pte = ept_update_pte;
>>  	context->root_level = PT64_ROOT_4LEVEL;
>>  	context->direct_map = false;
>> -	context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
>> +	context->mmu_role.base_role.word =
>> +		root_page_role.word & mmu_base_role_mask.word;
>>  	context->get_pdptr = kvm_pdptr_read;
>>  
>>  	update_permission_bitmask(vcpu, context, true);
>> @@ -5131,10 +5133,13 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>>  
>>  		local_flush = true;
>>  		while (npte--) {
>> +			unsigned int base_role =
>> +				vcpu->arch.mmu->mmu_role.base_role.word;
>> +
>>  			entry = *spte;
>>  			mmu_page_zap_pte(vcpu->kvm, sp, spte);
>>  			if (gentry &&
>> -			      !((sp->role.word ^ vcpu->arch.mmu->base_role.word)
>> +			      !((sp->role.word ^ base_role)
>>  			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
>>  				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
>>  			if (need_remote_flush(entry, *spte))
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 494148818b8d..0d41116bef1f 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -9028,7 +9028,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
>>  
>>  		kvm_mmu_unload(vcpu);
>>  		mmu->ept_ad = accessed_dirty;
>> -		mmu->base_role.ad_disabled = !accessed_dirty;
>> +		mmu->mmu_role.base_role.ad_disabled = !accessed_dirty;
>>  		vmcs12->ept_pointer = address;
>>  		/*
>>  		 * TODO: Check what's the correct approach in case

-- 
  Vitaly