lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ffc9e29aa6b9175bde23a522409a731d5de5f169.camel@intel.com>
Date: Mon, 13 Oct 2025 19:31:24 +0000
From: "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>
To: "seanjc@...gle.com" <seanjc@...gle.com>, "x86@...nel.org"
	<x86@...nel.org>, "kas@...nel.org" <kas@...nel.org>,
	"dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
	"mingo@...hat.com" <mingo@...hat.com>, "tglx@...utronix.de"
	<tglx@...utronix.de>, "bp@...en8.de" <bp@...en8.de>, "pbonzini@...hat.com"
	<pbonzini@...hat.com>
CC: "Gao, Chao" <chao.gao@...el.com>, "Huang, Kai" <kai.huang@...el.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, "Williams, Dan
 J" <dan.j.williams@...el.com>, "Hunter, Adrian" <adrian.hunter@...el.com>,
	"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "linux-coco@...ts.linux.dev"
	<linux-coco@...ts.linux.dev>, "xin@...or.com" <xin@...or.com>
Subject: Re: [RFC PATCH 3/4] KVM: x86/tdx: Do VMXON and TDX-Module
 initialization during tdx_init()

On Fri, 2025-10-10 at 15:04 -0700, Sean Christopherson wrote:
> TODO: Split this up, write changelogs.
> 
> Cc: Chao Gao <chao.gao@...el.com>
> Cc: Dan Williams <dan.j.williams@...el.com>
> Cc: Xin Li (Intel) <xin@...or.com>
> Cc: Kai Huang <kai.huang@...el.com>
> Cc: Adrian Hunter <adrian.hunter@...el.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> 
[snip]
> -
>  static int __init __tdx_bringup(void)
>  {
>  	const struct tdx_sys_info_td_conf *td_conf;
> @@ -3462,34 +3407,18 @@ static int __init __tdx_bringup(void)
>  		}
>  	}
>  
> -	/*
> -	 * Enabling TDX requires enabling hardware virtualization first,
> -	 * as making SEAMCALLs requires CPU being in post-VMXON state.
> -	 */
> -	r = kvm_enable_virtualization();
> -	if (r)
> -		return r;
> -
> -	cpus_read_lock();
> -	r = __do_tdx_bringup();
> -	cpus_read_unlock();
> -
> -	if (r)
> -		goto tdx_bringup_err;
> -
> -	r = -EINVAL;
>  	/* Get TDX global information for later use */
>  	tdx_sysinfo = tdx_get_sysinfo();
> -	if (WARN_ON_ONCE(!tdx_sysinfo))
> -		goto get_sysinfo_err;
> +	if (!tdx_sysinfo)
> +		return -EINVAL;
>  
>  	/* Check TDX module and KVM capabilities */
>  	if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) ||
>  	    !tdx_get_supported_xfam(&tdx_sysinfo->td_conf))
> -		goto get_sysinfo_err;
> +		return -EINVAL;
>  
>  	if (!(tdx_sysinfo->features.tdx_features0 & MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM))
> -		goto get_sysinfo_err;
> +		return -EINVAL;
>  
>  	/*
>  	 * TDX has its own limit of maximum vCPUs it can support for all
> @@ -3524,34 +3453,31 @@ static int __init __tdx_bringup(void)
>  	if (td_conf->max_vcpus_per_td < num_present_cpus()) {
>  		pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n",
>  				td_conf->max_vcpus_per_td, num_present_cpus());
> -		goto get_sysinfo_err;
> +		return -EINVAL;
>  	}
>  
>  	if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids()))
> -		goto get_sysinfo_err;
> +		return -EINVAL;
>  
>  	/*
> -	 * Leave hardware virtualization enabled after TDX is enabled
> -	 * successfully.  TDX CPU hotplug depends on this.
> +	 * TDX-specific cpuhp callback to disallow offlining the last CPU in a
> +	 * packing while KVM is running one or more TDs.  Reclaiming HKIDs
> +	 * requires doing PAGE.WBINVD on every package, i.e. offlining all CPUs
> +	 * of a package would prevent reclaiming the HKID.
>  	 */
> +	r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "kvm/cpu/tdx:online",
> +			      tdx_online_cpu, tdx_offline_cpu);

Could pass NULL instead of tdx_online_cpu() and delete this version of
tdx_online_cpu(). Also could remove the error handling too.

Also, can we name the two tdx_offline_cpu()'s differently? This one is all about
keyid's being in use. tdx_hkid_offline_cpu()?

> +	if (r < 0)
> +		goto err_cpuhup;
> +
> +	tdx_cpuhp_state = r;
>  	return 0;
>  
> -get_sysinfo_err:
> -	__tdx_cleanup();
> -tdx_bringup_err:
> -	kvm_disable_virtualization();
> +err_cpuhup:
> +	misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
>  	return r;
>  }
>  
> -void tdx_cleanup(void)
> -{
> -	if (enable_tdx) {
> -		misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
> -		__tdx_cleanup();
> -		kvm_disable_virtualization();
> -	}
> -}
> -
>  int __init tdx_bringup(void)
>  {
>  	int r, i;
> @@ -3563,6 +3489,16 @@ int __init tdx_bringup(void)
>  	if (!enable_tdx)
>  		return 0;
>  
> +	if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
> +		pr_err("TDX not supported by the host platform\n");
> +		goto success_disable_tdx;
> +	}
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_OSXSAVE)) {
> +		pr_err("OSXSAVE is required for TDX\n");
> +		return -EINVAL;

Why change this condition from goto success_disable_tdx?

> +	}
> +
>  	if (!enable_ept) {
>  		pr_err("EPT is required for TDX\n");
>  		goto success_disable_tdx;
> @@ -3578,61 +3514,9 @@ int __init tdx_bringup(void)
>  		goto success_disable_tdx;
>  	}
>  
> -	if (!cpu_feature_enabled(X86_FEATURE_OSXSAVE)) {
> -		pr_err("tdx: OSXSAVE is required for TDX\n");
> -		goto success_disable_tdx;
> -	}
> -
> -	if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
> -		pr_err("tdx: MOVDIR64B is required for TDX\n");
> -		goto success_disable_tdx;
> -	}
> -
> -	if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
> -		pr_err("Self-snoop is required for TDX\n");
> -		goto success_disable_tdx;
> -	}
> -
> -	if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
> -		pr_err("tdx: no TDX private KeyIDs available\n");
> -		goto success_disable_tdx;
> -	}
> -
> -	if (!enable_virt_at_load) {
> -		pr_err("tdx: tdx requires kvm.enable_virt_at_load=1\n");
> -		goto success_disable_tdx;
> -	}
> -
> -	/*
> -	 * Ideally KVM should probe whether TDX module has been loaded
> -	 * first and then try to bring it up.  But TDX needs to use SEAMCALL
> -	 * to probe whether the module is loaded (there is no CPUID or MSR
> -	 * for that), and making SEAMCALL requires enabling virtualization
> -	 * first, just like the rest steps of bringing up TDX module.
> -	 *
> -	 * So, for simplicity do everything in __tdx_bringup(); the first
> -	 * SEAMCALL will return -ENODEV when the module is not loaded.  The
> -	 * only complication is having to make sure that initialization
> -	 * SEAMCALLs don't return TDX_SEAMCALL_VMFAILINVALID in other
> -	 * cases.
> -	 */
>  	r = __tdx_bringup();
> -	if (r) {
> -		/*
> -		 * Disable TDX only but don't fail to load module if the TDX
> -		 * module could not be loaded.  No need to print message saying
> -		 * "module is not loaded" because it was printed when the first
> -		 * SEAMCALL failed.  Don't bother unwinding the S-EPT hooks or
> -		 * vm_size, as kvm_x86_ops have already been finalized (and are
> -		 * intentionally not exported).  The S-EPT code is unreachable,
> -		 * and allocating a few more bytes per VM in a should-be-rare
> -		 * failure scenario is a non-issue.
> -		 */
> -		if (r == -ENODEV)
> -			goto success_disable_tdx;
> -
> +	if (r)
>  		enable_tdx = 0;
> -	}
>  

I think the previous discussion was that there should be a probe and enable
step. We could not fail KVM init if TDX is not supported (probe), but not try to
cleanly handle any other unexpected error (fail enabled).

The existing code mostly has the "probe" type checks in tdx_bringup(), and the
"enable" type checks in __tdx_bringup(). But now the gutted __tdx_bringup() is
very probe-y. Ideally we could separate these into named "install" and "probe"
functions. I don't know if this would be good to do this as part of this series
or later though.

>  	return r;
>  
> 
> 

[snip]

>  
>  /*
>   * Add a memory region as a TDX memory block.  The caller must make sure
>   * all memory regions are added in address ascending order and don't
>   * overlap.
>   */
> -static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
> -			    unsigned long end_pfn, int nid)
> +static __init int add_tdx_memblock(struct list_head *tmb_list,
> +				   unsigned long start_pfn,
> +				   unsigned long end_pfn, int nid)

One easy thing to break this up would be to do this __init adjustments in a
follow on patch.


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ