lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20fbbfa8-3d92-c497-f577-fbc28899082c@intel.com>
Date:   Fri, 27 May 2022 17:40:35 +0800
From:   "Wang, Lei" <lei4.wang@...el.com>
To:     Sean Christopherson <seanjc@...gle.com>
Cc:     pbonzini@...hat.com, vkuznets@...hat.com, wanpengli@...cent.com,
        jmattson@...gle.com, joro@...tes.org, chenyi.qiang@...el.com,
        kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 6/8] KVM: MMU: Add support for PKS emulation

On 5/25/2022 7:28 AM, Sean Christopherson wrote:
> On Sun, Apr 24, 2022, Lei Wang wrote:
>> @@ -454,10 +455,11 @@ struct kvm_mmu {
>>   	u8 permissions[16];
>>   
>>   	/*
>> -	* The pkru_mask indicates if protection key checks are needed.  It
>> -	* consists of 16 domains indexed by page fault error code bits [4:1],
>> -	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
>> -	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
>> +	* The pkr_mask indicates if protection key checks are needed.
>> +	* It consists of 16 domains indexed by page fault error code
>> +	* bits[4:1] with PFEC.RSVD replaced by ACC_USER_MASK from the
>> +	* page tables. Each domain has 2 bits which are ANDed with AD
>> +	* and WD from PKRU/PKRS.
> Same comments, align and wrap closer to 80 please.
Will do it.
>>   	*/
>>   	u32 pkr_mask;
>>   
>> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
>> index cea03053a153..6963c641e6ce 100644
>> --- a/arch/x86/kvm/mmu.h
>> +++ b/arch/x86/kvm/mmu.h
>> @@ -45,7 +45,8 @@
>>   #define PT32E_ROOT_LEVEL 3
>>   
>>   #define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \
>> -			       X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
>> +			       X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | \
>> +			       X86_CR4_PKS)
>>   
>>   #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
>>   #define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX)
>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>> index 6d3276986102..a6cbc22d3312 100644
>> --- a/arch/x86/kvm/mmu/mmu.c
>> +++ b/arch/x86/kvm/mmu/mmu.c
>> @@ -209,6 +209,7 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smep, X86_CR4_SMEP);
>>   BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smap, X86_CR4_SMAP);
>>   BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pke, X86_CR4_PKE);
>>   BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, la57, X86_CR4_LA57);
>> +BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pks, X86_CR4_PKS);
>>   BUILD_MMU_ROLE_REGS_ACCESSOR(efer, nx, EFER_NX);
>>   BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA);
>>   
>> @@ -231,6 +232,7 @@ BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, smep);
>>   BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, smap);
>>   BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, pke);
>>   BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, la57);
>> +BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, pks);
>>   BUILD_MMU_ROLE_ACCESSOR(base, efer, nx);
>>   
>>   static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
>> @@ -4608,37 +4610,58 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
>>   }
>>   
>>   /*
> ...
>
>> + * Protection Key Rights (PKR) is an additional mechanism by which data accesses
>> + * with 4-level or 5-level paging (EFER.LMA=1) may be disabled based on the
>> + * Protection Key Rights Userspace (PRKU) or Protection Key Rights Supervisor
>> + * (PKRS) registers.  The Protection Key (PK) used for an access is a 4-bit
>> + * value specified in bits 62:59 of the leaf PTE used to translate the address.
>> + *
>> + * PKRU and PKRS are 32-bit registers, with 16 2-bit entries consisting of an
>> + * access-disable (AD) and write-disable (WD) bit.  The PK from the leaf PTE is
>> + * used to index the approriate PKR (see below), e.g. PK=1 would consume bits
> s/approriate/appropriate
Will correct it.
>> + * 3:2 (bit 3 == write-disable, bit 2 == access-disable).
>> + *
>> + * The PK register (PKRU vs. PKRS) indexed by the PK depends on the type of
>> + * _address_ (not access type!).  For a user-mode address, PKRU is used; for a
>> + * supervisor-mode address, PKRS is used.  An address is supervisor-mode if the
>> + * U/S flag (bit 2) is 0 in at least one of the paging-structure entries, i.e.
>> + * an address is user-mode if the U/S flag is 1 in _all_ entries.  Again, this
>> + * is the address type, not the the access type, e.g. a supervisor-mode _access_
> Double "the the" can be a single "the".
Will remove it.
>> + * will consume PKRU if the _address_ is a user-mode address.
>> + *
>> + * As alluded to above, PKR checks are only performed for data accesses; code
>> + * fetches are not subject to PKR checks.  Terminal page faults (!PRESENT or
>> + * PFEC.RSVD=1) are also not subject to PKR checks.
>> + *
>> + * PKR write-disable checks for superivsor-mode _accesses_ are performed if and
>> + * only if CR0.WP=1 (though access-disable checks still apply).
>> + *
>> + * In summary, PKR checks are based on (a) EFER.LMA, (b) CR4.PKE or CR4.PKS,
>> + * (c) CR0.WP, (d) the PK in the leaf PTE, (e) two bits from the corresponding
>> + * PKR{S,U} entry, (f) the access type (derived from the other PFEC bits), and
>> + * (g) the address type (retrieved from the paging-structure entries).
>> + *
>> + * To avoid conditional branches in permission_fault(), the PKR bitmask caches
>> + * the above inputs, except for (e) the PKR{S,U} entry.  The FETCH, USER, and
>> + * WRITE bits of the PFEC and the effective value of the paging-structures' U/S
>> + * bit (slotted into the PFEC.RSVD position, bit 3) are used to index into the
>> + * PKR bitmask (similar to the 4-bit Protection Key itself).  The two bits of
>> + * the PKR bitmask "entry" are then extracted and ANDed with the two bits of
>> + * the PKR{S,U} register corresponding to the address type and protection key.
>> + *
>> + * E.g. for all values where PFEC.FETCH=1, the corresponding pkr_bitmask bits
>> + * will be 00b, thus masking away the AD and WD bits from the PKR{S,U} register
>> + * to suppress PKR checks on code fetches.
>> + */
>>   static void update_pkr_bitmask(struct kvm_mmu *mmu)
>>   {
>>   	unsigned bit;
>>   	bool wp;
>> -
> Please keep this newline, i.e. after the declaration of the cr4 booleans.  That
> helps isolate the clearing of mmu->pkr_mask, which makes the functional affect of
> the earlier return more obvious.
>
> Ah, and use reverse fir tree for the variable declarations, i.e.
>
> 	bool cr4_pke = is_cr4_pke(mmu);
> 	bool cr4_pks = is_cr4_pks(mmu);
> 	unsigned bit;
> 	bool wp;
>
> 	mmu->pkr_mask = 0;
>
> 	if (!cr4_pke && !cr4_pks)
> 		return;

Very nice of you, will use reverse fir tree for the declaration.

>> +	bool cr4_pke = is_cr4_pke(mmu);
>> +	bool cr4_pks = is_cr4_pks(mmu);
>>   	mmu->pkr_mask = 0;
>>   
>> -	if (!is_cr4_pke(mmu))
>> +	if (!cr4_pke && !cr4_pks)
>>   		return;
>>   
>>   	wp = is_cr0_wp(mmu);
>    
>
>    ...
>
>> @@ -6482,14 +6509,22 @@ u32 kvm_mmu_pkr_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>>   		     unsigned pte_access, unsigned pte_pkey, unsigned int pfec)
>>   {
>>   	u32 pkr_bits, offset;
>> +	u32 pkr;
>>   
>>   	/*
>> -	* PKRU defines 32 bits, there are 16 domains and 2
>> -	* attribute bits per domain in pkru.  pte_pkey is the
>> -	* index of the protection domain, so pte_pkey * 2 is
>> -	* is the index of the first bit for the domain.
>> +	* PKRU and PKRS both define 32 bits. There are 16 domains
>> +	* and 2 attribute bits per domain in them. pte_key is the
>> +	* index of the protection domain, so pte_pkey * 2 is the
>> +	* index of the first bit for the domain. The use of PKRU
>> +	* versus PKRS is selected by the address type, as determined
>> +	* by the U/S bit in the paging-structure entries.
>
> Align and wrap closer to 80 please.
Will do it.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ