linux-kernel - Re: [RFC PATCH v4 05/16] KVM: TDX: Pass size to reclaim

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <383cf8d1-1d6f-f0d3-08de-fe4dc3ce1778@linux.intel.com>
Date:   Wed, 6 Sep 2023 09:48:35 +0800
From:   Binbin Wu <binbin.wu@...ux.intel.com>
To:     isaku.yamahata@...el.com, Xiaoyao Li <xiaoyao.li@...el.com>
Cc:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
        isaku.yamahata@...il.com, Paolo Bonzini <pbonzini@...hat.com>,
        erdemaktas@...gle.com, Sean Christopherson <seanjc@...gle.com>,
        Sagi Shahar <sagis@...gle.com>,
        David Matlack <dmatlack@...gle.com>,
        Kai Huang <kai.huang@...el.com>,
        Zhi Wang <zhi.wang.linux@...il.com>, chen.bo@...el.com,
        hang.yuan@...el.com, tina.zhang@...el.com
Subject: Re: [RFC PATCH v4 05/16] KVM: TDX: Pass size to reclaim_page()



On 7/26/2023 6:23 AM, isaku.yamahata@...el.com wrote:
> From: Xiaoyao Li <xiaoyao.li@...el.com>
>
> A 2MB large page can be tdh_mem_page_aug()'ed to TD directly. In this case,
> it needs to reclaim and clear the page as 2MB size.
>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@...el.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> ---
>   arch/x86/kvm/vmx/tdx.c | 24 ++++++++++++++----------
>   1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 3522ee232eda..86cfbf435671 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -198,12 +198,13 @@ static void tdx_disassociate_vp_on_cpu(struct kvm_vcpu *vcpu)
>   	smp_call_function_single(cpu, tdx_disassociate_vp_arg, vcpu, 1);
>   }
>   
> -static void tdx_clear_page(unsigned long page_pa)
> +static void tdx_clear_page(unsigned long page_pa, int size)
>   {
>   	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
>   	void *page = __va(page_pa);
>   	unsigned long i;
>   
> +	WARN_ON_ONCE(size % PAGE_SIZE);
>   	/*
>   	 * When re-assign one page from old keyid to a new keyid, MOVDIR64B is
>   	 * required to clear/write the page with new keyid to prevent integrity
> @@ -212,7 +213,7 @@ static void tdx_clear_page(unsigned long page_pa)
>   	 * clflush doesn't flush cache with HKID set.  The cache line could be
>   	 * poisoned (even without MKTME-i), clear the poison bit.
>   	 */
> -	for (i = 0; i < PAGE_SIZE; i += 64)
> +	for (i = 0; i < size; i += 64)
>   		movdir64b(page + i, zero_page);
>   	/*
>   	 * MOVDIR64B store uses WC buffer.  Prevent following memory reads
> @@ -221,7 +222,8 @@ static void tdx_clear_page(unsigned long page_pa)
>   	__mb();
>   }
>   
> -static int tdx_reclaim_page(hpa_t pa, bool do_wb, u16 hkid)
> +static int tdx_reclaim_page(hpa_t pa, enum pg_level level,
> +			    bool do_wb, u16 hkid)
>   {
>   	struct tdx_module_output out;
>   	u64 err;
> @@ -239,8 +241,10 @@ static int tdx_reclaim_page(hpa_t pa, bool do_wb, u16 hkid)
>   		pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out);
>   		return -EIO;
>   	}
> +	/* out.r8 == tdx sept page level */
> +	WARN_ON_ONCE(out.r8 != pg_level_to_tdx_sept_level(level));
>   
> -	if (do_wb) {
> +	if (do_wb && level == PG_LEVEL_4K) {
I was wondering if it is better to add a WARN_ON_ONCE() to ensure level is
PG_LEVEL_4K instead of skipping it silently. But later, I found the 
warning of
comparing out.r8 and level has guaranteed that there will be a warning 
if there
is a mismatch between do_wb and level.

>   		/*
>   		 * Only TDR page gets into this path.  No contention is expected
>   		 * because of the last page of TD.
> @@ -252,7 +256,7 @@ static int tdx_reclaim_page(hpa_t pa, bool do_wb, u16 hkid)
>   		}
>   	}
>   
> -	tdx_clear_page(pa);
> +	tdx_clear_page(pa, KVM_HPAGE_SIZE(level));
>   	return 0;
>   }
>   
> @@ -266,7 +270,7 @@ static void tdx_reclaim_td_page(unsigned long td_page_pa)
>   	 * was already flushed by TDH.PHYMEM.CACHE.WB before here, So
>   	 * cache doesn't need to be flushed again.
>   	 */
> -	if (tdx_reclaim_page(td_page_pa, false, 0))
> +	if (tdx_reclaim_page(td_page_pa, PG_LEVEL_4K, false, 0))
>   		/*
>   		 * Leak the page on failure:
>   		 * tdx_reclaim_page() returns an error if and only if there's an
> @@ -474,7 +478,7 @@ void tdx_vm_free(struct kvm *kvm)
>   	 * while operating on TD (Especially reclaiming TDCS).  Cache flush with
>   	 * TDX global HKID is needed.
>   	 */
> -	if (tdx_reclaim_page(kvm_tdx->tdr_pa, true, tdx_global_keyid))
> +	if (tdx_reclaim_page(kvm_tdx->tdr_pa, PG_LEVEL_4K, true, tdx_global_keyid))
>   		return;
>   
>   	free_page((unsigned long)__va(kvm_tdx->tdr_pa));
> @@ -1468,7 +1472,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
>   		 * The HKID assigned to this TD was already freed and cache
>   		 * was already flushed. We don't have to flush again.
>   		 */
> -		err = tdx_reclaim_page(hpa, false, 0);
> +		err = tdx_reclaim_page(hpa, level, false, 0);
>   		if (KVM_BUG_ON(err, kvm))
>   			return -EIO;
>   		tdx_unpin(kvm, pfn);
> @@ -1501,7 +1505,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
>   		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
>   		return -EIO;
>   	}
> -	tdx_clear_page(hpa);
> +	tdx_clear_page(hpa, PAGE_SIZE);
>   	tdx_unpin(kvm, pfn);
>   	return 0;
>   }
> @@ -1612,7 +1616,7 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
>   	 * already flushed. We don't have to flush again.
>   	 */
>   	if (!is_hkid_assigned(kvm_tdx))
> -		return tdx_reclaim_page(__pa(private_spt), false, 0);
> +		return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K, false, 0);
>   
>   	/*
>   	 * free_private_spt() is (obviously) called when a shadow page is being