lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230628122342.zdnqsgnugalqj6ix@box.shutemov.name>
Date:   Wed, 28 Jun 2023 15:23:42 +0300
From:   kirill.shutemov@...ux.intel.com
To:     Kai Huang <kai.huang@...el.com>
Cc:     linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        linux-mm@...ck.org, x86@...nel.org, dave.hansen@...el.com,
        tony.luck@...el.com, peterz@...radead.org, tglx@...utronix.de,
        bp@...en8.de, mingo@...hat.com, hpa@...or.com, seanjc@...gle.com,
        pbonzini@...hat.com, david@...hat.com, dan.j.williams@...el.com,
        rafael.j.wysocki@...el.com, ashok.raj@...el.com,
        reinette.chatre@...el.com, len.brown@...el.com, ak@...ux.intel.com,
        isaku.yamahata@...el.com, ying.huang@...el.com, chao.gao@...el.com,
        sathyanarayanan.kuppuswamy@...ux.intel.com, nik.borisov@...e.com,
        bagasdotme@...il.com, sagis@...gle.com, imammedo@...hat.com
Subject: Re: [PATCH v12 18/22] x86/virt/tdx: Keep TDMRs when module
 initialization is successful

On Tue, Jun 27, 2023 at 02:12:48AM +1200, Kai Huang wrote:
> On the platforms with the "partial write machine check" erratum, the
> kexec() needs to convert all TDX private pages back to normal before
> booting to the new kernel.  Otherwise, the new kernel may get unexpected
> machine check.
> 
> There's no existing infrastructure to track TDX private pages.  Change
> to keep TDMRs when module initialization is successful so that they can
> be used to find PAMTs.
> 
> With this change, only put_online_mems() and freeing the buffer of the
> TDSYSINFO_STRUCT and CMR array still need to be done even when module
> initialization is successful.  Adjust the error handling to explicitly
> do them when module initialization is successful and unconditionally
> clean up the rest when initialization fails.
> 
> Signed-off-by: Kai Huang <kai.huang@...el.com>
> ---
> 
> v11 -> v12 (new patch):
>   - Defer keeping TDMRs logic to this patch for better review
>   - Improved error handling logic (Nikolay/Kirill in patch 15)
> 
> ---
>  arch/x86/virt/vmx/tdx/tdx.c | 84 ++++++++++++++++++-------------------
>  1 file changed, 42 insertions(+), 42 deletions(-)
> 
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index 52b7267ea226..85b24b2e9417 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -49,6 +49,8 @@ static DEFINE_MUTEX(tdx_module_lock);
>  /* All TDX-usable memory regions.  Protected by mem_hotplug_lock. */
>  static LIST_HEAD(tdx_memlist);
>  
> +static struct tdmr_info_list tdx_tdmr_list;
> +
>  /*
>   * Wrapper of __seamcall() to convert SEAMCALL leaf function error code
>   * to kernel error code.  @seamcall_ret and @out contain the SEAMCALL
> @@ -1047,7 +1049,6 @@ static int init_tdmrs(struct tdmr_info_list *tdmr_list)
>  static int init_tdx_module(void)
>  {
>  	struct tdsysinfo_struct *sysinfo;
> -	struct tdmr_info_list tdmr_list;
>  	struct cmr_info *cmr_array;
>  	int ret;
>  
> @@ -1088,17 +1089,17 @@ static int init_tdx_module(void)
>  		goto out_put_tdxmem;
>  
>  	/* Allocate enough space for constructing TDMRs */
> -	ret = alloc_tdmr_list(&tdmr_list, sysinfo);
> +	ret = alloc_tdmr_list(&tdx_tdmr_list, sysinfo);
>  	if (ret)
>  		goto out_free_tdxmem;
>  
>  	/* Cover all TDX-usable memory regions in TDMRs */
> -	ret = construct_tdmrs(&tdx_memlist, &tdmr_list, sysinfo);
> +	ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, sysinfo);
>  	if (ret)
>  		goto out_free_tdmrs;
>  
>  	/* Pass the TDMRs and the global KeyID to the TDX module */
> -	ret = config_tdx_module(&tdmr_list, tdx_global_keyid);
> +	ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid);
>  	if (ret)
>  		goto out_free_pamts;
>  
> @@ -1118,51 +1119,50 @@ static int init_tdx_module(void)
>  		goto out_reset_pamts;
>  
>  	/* Initialize TDMRs to complete the TDX module initialization */
> -	ret = init_tdmrs(&tdmr_list);
> +	ret = init_tdmrs(&tdx_tdmr_list);
> +	if (ret)
> +		goto out_reset_pamts;
> +
> +	pr_info("%lu KBs allocated for PAMT.\n",
> +			tdmrs_count_pamt_kb(&tdx_tdmr_list));
> +
> +	/*
> +	 * @tdx_memlist is written here and read at memory hotplug time.
> +	 * Lock out memory hotplug code while building it.
> +	 */
> +	put_online_mems();
> +	/*
> +	 * For now both @sysinfo and @cmr_array are only used during
> +	 * module initialization, so always free them.
> +	 */
> +	free_page((unsigned long)sysinfo);
> +
> +	return 0;
>  out_reset_pamts:
> -	if (ret) {
> -		/*
> -		 * Part of PAMTs may already have been initialized by the
> -		 * TDX module.  Flush cache before returning PAMTs back
> -		 * to the kernel.
> -		 */
> -		wbinvd_on_all_cpus();
> -		/*
> -		 * According to the TDX hardware spec, if the platform
> -		 * doesn't have the "partial write machine check"
> -		 * erratum, any kernel read/write will never cause #MC
> -		 * in kernel space, thus it's OK to not convert PAMTs
> -		 * back to normal.  But do the conversion anyway here
> -		 * as suggested by the TDX spec.
> -		 */
> -		tdmrs_reset_pamt_all(&tdmr_list);
> -	}
> +	/*
> +	 * Part of PAMTs may already have been initialized by the
> +	 * TDX module.  Flush cache before returning PAMTs back
> +	 * to the kernel.
> +	 */
> +	wbinvd_on_all_cpus();
> +	/*
> +	 * According to the TDX hardware spec, if the platform
> +	 * doesn't have the "partial write machine check"
> +	 * erratum, any kernel read/write will never cause #MC
> +	 * in kernel space, thus it's OK to not convert PAMTs
> +	 * back to normal.  But do the conversion anyway here
> +	 * as suggested by the TDX spec.
> +	 */
> +	tdmrs_reset_pamt_all(&tdx_tdmr_list);
>  out_free_pamts:
> -	if (ret)
> -		tdmrs_free_pamt_all(&tdmr_list);
> -	else
> -		pr_info("%lu KBs allocated for PAMT.\n",
> -				tdmrs_count_pamt_kb(&tdmr_list));
> +	tdmrs_free_pamt_all(&tdx_tdmr_list);
>  out_free_tdmrs:
> -	/*
> -	 * Always free the buffer of TDMRs as they are only used during
> -	 * module initialization.
> -	 */
> -	free_tdmr_list(&tdmr_list);
> +	free_tdmr_list(&tdx_tdmr_list);
>  out_free_tdxmem:
> -	if (ret)
> -		free_tdx_memlist(&tdx_memlist);
> +	free_tdx_memlist(&tdx_memlist);
>  out_put_tdxmem:
> -	/*
> -	 * @tdx_memlist is written here and read at memory hotplug time.
> -	 * Lock out memory hotplug code while building it.
> -	 */
>  	put_online_mems();
>  out:
> -	/*
> -	 * For now both @sysinfo and @cmr_array are only used during
> -	 * module initialization, so always free them.
> -	 */
>  	free_page((unsigned long)sysinfo);
>  	return ret;
>  }

This diff is extremely hard to follow, but I think the change to error
handling Nikolay proposed has to be applied to the function from the
beginning, not changed drastically in this patch.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ