linux-kernel - Re: [PATCH v4 5/5] KVM: x86: improve reexecute

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130104224432.GB13481@amt.cnet>
Date:	Fri, 4 Jan 2013 20:44:32 -0200
From:	Marcelo Tosatti <mtosatti@...hat.com>
To:	Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
Cc:	Gleb Natapov <gleb@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>, KVM <kvm@...r.kernel.org>
Subject: Re: [PATCH v4 5/5] KVM: x86: improve reexecute_instruction

On Fri, Jan 04, 2013 at 09:56:59PM +0800, Xiao Guangrong wrote:
> The current reexecute_instruction can not well detect the failed instruction
> emulation. It allows guest to retry all the instructions except it accesses
> on error pfn
> 
> For example, some cases are nested-write-protect - if the page we want to
> write is used as PDE but it chains to itself. Under this case, we should
> stop the emulation and report the case to userspace
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    7 +++++
>  arch/x86/kvm/paging_tmpl.h      |   24 +++++++++++++-----
>  arch/x86/kvm/x86.c              |   50 ++++++++++++++++++++++++--------------
>  3 files changed, 55 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c431b33..de229e6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -502,6 +502,13 @@ struct kvm_vcpu_arch {
>  		u64 msr_val;
>  		struct gfn_to_hva_cache data;
>  	} pv_eoi;
> +
> +	/*
> +	 * Indicate whether the gfn is used as page table in guest which
> +	 * is set when fix page fault and used to detect unhandeable
> +	 * instruction.
> +	 */
> +	bool target_gfn_is_pt;
>  };
> 
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 0453fa0..ca1be75 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -507,20 +507,27 @@ out_gpte_changed:
>   */
>  static bool
>  FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
> -			      struct guest_walker *walker, int user_fault)
> +			      struct guest_walker *walker, int user_fault,
> +			      bool *target_gfn_is_pt)
>  {
>  	int level;
>  	gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
> +	bool self_changed = false;
> +
> +	*target_gfn_is_pt = false;
> 
>  	if (!(walker->pte_access & ACC_WRITE_MASK ||
>  	      (!is_write_protection(vcpu) && !user_fault)))
>  		return false;
> 
> -	for (level = walker->level; level <= walker->max_level; level++)
> -		if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask))
> -			return true;
> +	for (level = walker->level; level <= walker->max_level; level++) {
> +		gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
> +
> +		self_changed |= !(gfn & mask);
> +		*target_gfn_is_pt |= !gfn;
> +	}
> 
> -	return false;
> +	return self_changed;
>  }
> 
>  /*
> @@ -548,7 +555,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
>  	int level = PT_PAGE_TABLE_LEVEL;
>  	int force_pt_level;
>  	unsigned long mmu_seq;
> -	bool map_writable;
> +	bool map_writable, is_self_change_mapping;
> 
>  	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
> 
> @@ -576,9 +583,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
>  		return 0;
>  	}
> 
> +	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
> +		      &walker, user_fault, &vcpu->arch.target_gfn_is_pt);
> +
>  	if (walker.level >= PT_DIRECTORY_LEVEL)
>  		force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
> -		   || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault);
> +		   || is_self_change_mapping;
>  	else
>  		force_pt_level = 1;
>  	if (!force_pt_level) {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b0a3678..44c6992 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4756,15 +4756,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
>  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  {
>  	gpa_t gpa = cr2;
> +	gfn_t gfn;
>  	pfn_t pfn;
> -	unsigned int indirect_shadow_pages;
> -
> -	spin_lock(&vcpu->kvm->mmu_lock);
> -	indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> -	spin_unlock(&vcpu->kvm->mmu_lock);
> -
> -	if (!indirect_shadow_pages)
> -		return false;

This renders the previous patch obsolete, pretty much (please fold).

>  	if (!vcpu->arch.mmu.direct_map) {
>  		/*
> @@ -4781,13 +4774,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  			return true;
>  	}
> 
> -	/*
> -	 * if emulation was due to access to shadowed page table
> -	 * and it failed try to unshadow page and re-enter the
> -	 * guest to let CPU execute the instruction.
> -	 */
> -	if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
> -		return true;
> +	gfn = gpa_to_gfn(gpa);
> 
>  	/*
>  	 * Do not retry the unhandleable instruction if it faults on the
> @@ -4795,13 +4782,38 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  	 * retry instruction -> write #PF -> emulation fail -> retry
>  	 * instruction -> ...
>  	 */
> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> -	if (!is_error_noslot_pfn(pfn)) {
> -		kvm_release_pfn_clean(pfn);
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +
> +	/*
> +	 * If the instruction failed on the error pfn, it can not be fixed,
> +	 * report the error to userspace.
> +	 */
> +	if (is_error_noslot_pfn(pfn))
> +		return false;
> +
> +	kvm_release_pfn_clean(pfn);
> +
> +	/* The instructions are well-emulated on direct mmu. */
> +	if (vcpu->arch.mmu.direct_map) {

!direct_map?

> +		unsigned int indirect_shadow_pages;
> +
> +		spin_lock(&vcpu->kvm->mmu_lock);
> +		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> +		spin_unlock(&vcpu->kvm->mmu_lock);
> +
> +		if (indirect_shadow_pages)
> +			kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> +
>  		return true;
>  	}
> 
> -	return false;
> +	kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> +
> +	/* If the target gfn is used as page table, the fault can
> +	 * not be avoided by unprotecting shadow page and it will
> +	 * be reported to userspace.
> +	 */
> +	return !vcpu->arch.target_gfn_is_pt;
>  }

The idea was

How about recording the gfn number for shadow pages that have been
shadowed in the current pagefault run? (which is cheap, compared to
shadowing these pages).

If failed instruction emulation is write to one of these gfns, then
fail.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/