[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <482f1c96-ce6a-8b77-ce18-ec5f19cde800@gmail.com>
Date: Fri, 12 May 2017 11:59:24 +0800
From: Xiao Guangrong <guangrong.xiao@...il.com>
To: Paolo Bonzini <pbonzini@...hat.com>, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org
Cc: Peter Feiner <pfeiner@...gle.com>,
David Matlack <dmatlack@...gle.com>,
Radim Krčmář <rkrcmar@...hat.com>,
Xiao Guangrong <xiaoguangrong@...cent.com>,
Wanpeng Li <wanpeng.li@...mail.com>
Subject: Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit
qualification
On 05/11/2017 07:23 PM, Paolo Bonzini wrote:
> This fixes the new ept_access_test_read_only and ept_access_test_read_write
> testcases from vmx.flat.
>
> The problem is that gpte_access moves bits around to switch from EPT
> bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an
> incorrect exit qualification. To fix this, make pt_access and
> pte_access operate on raw PTE values (only with NX flipped to mean
> "can execute") and call gpte_access at the end of the walk. This
> lets us use pte_access to compute the exit qualification with XWR
> bit order.
>
> Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
> ---
> arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 56241746abbd..b0454c7e4cff 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> pt_element_t pte;
> pt_element_t __user *uninitialized_var(ptep_user);
> gfn_t table_gfn;
> - unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
> + u64 pt_access, pte_access;
> + unsigned index, accessed_dirty, pte_pkey;
> unsigned nested_access;
> gpa_t pte_gpa;
> bool have_ad;
> int offset;
> + u64 walk_nx_mask = 0;
> const int write_fault = access & PFERR_WRITE_MASK;
> const int user_fault = access & PFERR_USER_MASK;
> const int fetch_fault = access & PFERR_FETCH_MASK;
> @@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
>
> #if PTTYPE == 64
> + walk_nx_mask = 1ULL << PT64_NX_SHIFT;
We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as:
- for EPT, this bit is useless
- for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too,
> if (walker->level == PT32E_ROOT_LEVEL) {
> pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
> trace_kvm_mmu_paging_element(pte, walker->level);
> @@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> walker->max_level = walker->level;
> ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
>
> - accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
> -
> /*
> * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
> * by the MOV to CR instruction are treated as reads and do not cause the
> @@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> */
> nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
>
> - pt_access = pte_access = ACC_ALL;
> + pte_access = ~0;
> ++walker->level;
>
> do {
> gfn_t real_gfn;
> unsigned long host_addr;
>
> - pt_access &= pte_access;
> + pt_access = pte_access;
> --walker->level;
>
> index = PT_INDEX(addr, walker->level);
> @@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>
> trace_kvm_mmu_paging_element(pte, walker->level);
>
> + /*
> + * Inverting the NX it lets us AND it like other
> + * permission bits.
> + */
> + pte_access = pt_access & (pte ^ walk_nx_mask);
> +
> if (unlikely(!FNAME(is_present_gpte)(pte)))
> goto error;
>
> @@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> goto error;
> }
>
> - accessed_dirty &= pte;
> - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
> -
> walker->ptes[walker->level - 1] = pte;
> } while (!is_last_gpte(mmu, walker->level, pte));
>
> pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
> - errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
> + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
> +
> + /* Convert to ACC_*_MASK flags for struct guest_walker. */
> + walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
> + walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
> + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
> if (unlikely(errcode))
> goto error;
>
> @@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> walker->gfn = real_gpa >> PAGE_SHIFT;
>
> if (!write_fault)
> - FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
> + FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
> else
> /*
> * On a write fault, fold the dirty bit into accessed_dirty.
> @@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> goto retry_walk;
> }
>
> - walker->pt_access = pt_access;
> - walker->pte_access = pte_access;
> pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
> - __func__, (u64)pte, pte_access, pt_access);
> + __func__, (u64)pte, walker->pte_access, walker->pt_access);
> return 1;
>
> error:
> @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> */
> if (!(errcode & PFERR_RSVD_MASK)) {
> vcpu->arch.exit_qualification &= 0x187;
> - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
^ here, the original code
is buggy as pt_access and pte have different bit order, fortunately, this patch fixes it
too. :)
Otherwise it looks good to me, thanks for your fix.
Reviewed-by: Xiao Guangrong <xiaoguangrong@...cent.com>
Powered by blists - more mailing lists