[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <SA1PR21MB1335046F4BA2B407C9026130BF6F9@SA1PR21MB1335.namprd21.prod.outlook.com>
Date: Tue, 2 May 2023 01:34:18 +0000
From: Dexuan Cui <decui@...rosoft.com>
To: "Michael Kelley (LINUX)" <mikelley@...rosoft.com>,
"ak@...ux.intel.com" <ak@...ux.intel.com>,
"arnd@...db.de" <arnd@...db.de>, "bp@...en8.de" <bp@...en8.de>,
"brijesh.singh@....com" <brijesh.singh@....com>,
"dan.j.williams@...el.com" <dan.j.williams@...el.com>,
"dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
"hpa@...or.com" <hpa@...or.com>,
"jane.chu@...cle.com" <jane.chu@...cle.com>,
"kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>,
KY Srinivasan <kys@...rosoft.com>,
"linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"luto@...nel.org" <luto@...nel.org>,
"mingo@...hat.com" <mingo@...hat.com>,
"peterz@...radead.org" <peterz@...radead.org>,
"rostedt@...dmis.org" <rostedt@...dmis.org>,
"sathyanarayanan.kuppuswamy@...ux.intel.com"
<sathyanarayanan.kuppuswamy@...ux.intel.com>,
"seanjc@...gle.com" <seanjc@...gle.com>,
"tglx@...utronix.de" <tglx@...utronix.de>,
"tony.luck@...el.com" <tony.luck@...el.com>,
"wei.liu@...nel.org" <wei.liu@...nel.org>,
"x86@...nel.org" <x86@...nel.org>
CC: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Tianyu Lan <Tianyu.Lan@...rosoft.com>
Subject: RE: [PATCH v5 5/6] Drivers: hv: vmbus: Support TDX guests
> From: Michael Kelley (LINUX) <mikelley@...rosoft.com>
> Sent: Monday, May 1, 2023 10:33 AM
> ...
> From: Dexuan Cui
> >
> > Add Hyper-V specific code so that a TDX guest can run on Hyper-V:
> > No need to use hv_vp_assist_page.
> > Don't use the unsafe Hyper-V TSC page.
> > Don't try to use HV_REGISTER_CRASH_CTL.
> > Don't trust Hyper-V's TLB-flushing hypercalls.
> > Don't use lazy EOI.
> > Share SynIC Event/Message pages and VMBus Monitor pages with the
> > host.
>
> This patch no longer does anything with the VMBus monitor pages.
Sorry, I forgot to update the commit log. Will drop this from the log.
> > Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
>
> The above line in the commit message is stale and can be dropped.
Will drop this from the commit log.
> > @@ -116,6 +117,7 @@ int hv_synic_alloc(void)
> > {
> > int cpu;
> > struct hv_per_cpu_context *hv_cpu;
> > + int ret = -ENOMEM;
> >
> > /*
> > * First, zero all per-cpu memory areas so hv_synic_free() can
> > @@ -159,6 +161,28 @@ int hv_synic_alloc(void)
> > goto err;
> > }
> > }
> > +
> > + /* It's better to leak the page if the decryption fails. */
> > + if (hv_isolation_type_tdx()) {
> > + ret = set_memory_decrypted(
> > + (unsigned long)hv_cpu->synic_message_page, 1);
> > + if (ret) {
> > + pr_err("Failed to decrypt SYNIC msg page\n");
> > + hv_cpu->synic_message_page = NULL;
> > + goto err;
> > + }
> > +
> > + ret = set_memory_decrypted(
> > + (unsigned long)hv_cpu->synic_event_page, 1);
> > + if (ret) {
> > + pr_err("Failed to decrypt SYNIC event page\n");
> > + hv_cpu->synic_event_page = NULL;
> > + goto err;
> > + }
>
> The error handling still doesn't work quite correctly. In the TDX case, upon
> exiting this function, the synic_message_page and the synic_event_page
> must
> each either be mapped decrypted or be NULL. This requirement is so
> that hv_synic_free() will do the right thing in changing the mapping back to
> encrypted. hv_synic_free() can't handle a non-NULL page being encrypted.
>
> In the above code, if we fail to decrypt the synic_message_page, then setting
> it to NULL will leak the page (which we'll live with) and ensures that
> hv_synic_free()
> will handle it correctly. But at that point we'll exit with synic_event_page
> non-NULL and in the encrypted state, which hv_synic_free() can't handle.
>
> Michael
Thanks for spotting the issue!
I think the below extra changes should do the job:
@@ -121,91 +121,102 @@ int hv_synic_alloc(void)
/*
* First, zero all per-cpu memory areas so hv_synic_free() can
* detect what memory has been allocated and cleanup properly
* after any failures.
*/
for_each_present_cpu(cpu) {
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
memset(hv_cpu, 0, sizeof(*hv_cpu));
}
hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
GFP_KERNEL);
if (hv_context.hv_numa_map == NULL) {
pr_err("Unable to allocate NUMA map\n");
goto err;
}
for_each_present_cpu(cpu) {
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
/*
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
pr_err("Unable to allocate SYNIC message page\n");
goto err;
}
hv_cpu->synic_event_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_event_page == NULL) {
pr_err("Unable to allocate SYNIC event page\n");
+
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ hv_cpu->synic_message_page = NULL;
+
goto err;
}
}
/* It's better to leak the page if the decryption fails. */
if (hv_isolation_type_tdx()) {
ret = set_memory_decrypted(
(unsigned long)hv_cpu->synic_message_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC msg page\n");
hv_cpu->synic_message_page = NULL;
+
+ /*
+ * Free the event page so that a TDX VM won't
+ * try to encrypt the page in hv_synic_free().
+ */
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ hv_cpu->synic_event_page = NULL;
goto err;
}
ret = set_memory_decrypted(
(unsigned long)hv_cpu->synic_event_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC event page\n");
hv_cpu->synic_event_page = NULL;
goto err;
}
memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
}
}
return 0;
err:
/*
* Any memory allocations that succeeded will be freed when
* the caller cleans up by calling hv_synic_free()
*/
return ret;
}
I'm going to use the below (i.e. v5 + the above extra changes) in v6.
Please let me know if there is still any bug.
@@ -116,6 +117,7 @@ int hv_synic_alloc(void)
{
int cpu;
struct hv_per_cpu_context *hv_cpu;
+ int ret = -ENOMEM;
/*
* First, zero all per-cpu memory areas so hv_synic_free() can
@@ -156,9 +158,42 @@ int hv_synic_alloc(void)
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_event_page == NULL) {
pr_err("Unable to allocate SYNIC event page\n");
+
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ hv_cpu->synic_message_page = NULL;
+
goto err;
}
}
+
+ /* It's better to leak the page if the decryption fails. */
+ if (hv_isolation_type_tdx()) {
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_message_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt SYNIC msg page\n");
+ hv_cpu->synic_message_page = NULL;
+
+ /*
+ * Free the event page so that a TDX VM won't
+ * try to encrypt the page in hv_synic_free().
+ */
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ hv_cpu->synic_event_page = NULL;
+ goto err;
+ }
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt SYNIC event page\n");
+ hv_cpu->synic_event_page = NULL;
+ goto err;
+ }
+
+ memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
+ memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
+ }
}
return 0;
@@ -167,18 +202,40 @@ int hv_synic_alloc(void)
* Any memory allocations that succeeded will be freed when
* the caller cleans up by calling hv_synic_free()
*/
- return -ENOMEM;
+ return ret;
}
void hv_synic_free(void)
{
int cpu;
+ int ret;
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
+ /* It's better to leak the page if the encryption fails. */
+ if (hv_isolation_type_tdx()) {
+ if (hv_cpu->synic_message_page) {
+ ret = set_memory_encrypted((unsigned long)
+ hv_cpu->synic_message_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC msg page\n");
+ hv_cpu->synic_message_page = NULL;
+ }
+ }
+
+ if (hv_cpu->synic_event_page) {
+ ret = set_memory_encrypted((unsigned long)
+ hv_cpu->synic_event_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC event page\n");
+ hv_cpu->synic_event_page = NULL;
+ }
+ }
+ }
+
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
}
I'll post a separate patch (currently if hv_synic_alloc() --> get_zeroed_page() fails,
hv_context.hv_numa_map is leaked):
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1515,27 +1515,27 @@ static int vmbus_bus_init(void)
}
ret = hv_synic_alloc();
if (ret)
goto err_alloc;
/*
* Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host.
*/
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_cpuhp;
+ goto err_alloc;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
if (ret)
goto err_connect;
if (hv_is_isolation_supported())
sysctl_record_panic_msg = 0;
/*
* Only register if the crash MSRs are available
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
@@ -1567,29 +1567,28 @@ static int vmbus_bus_init(void)
/*
* Always register the vmbus unload panic notifier because we
* need to shut the VMbus channel connection on panic.
*/
atomic_notifier_chain_register(&panic_notifier_list,
&hyperv_panic_vmbus_unload_block);
vmbus_request_offers();
return 0;
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
- hv_synic_free();
err_alloc:
+ hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
free_percpu_irq(vmbus_irq, vmbus_evt);
free_percpu(vmbus_evt);
}
err_setup:
bus_unregister(&hv_bus);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
return ret;
}
Powered by blists - more mailing lists