[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<SN6PR02MB4157BB7BD371A1144E184EB9D45CA@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Tue, 22 Jul 2025 17:44:11 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Roman Kisel <romank@...ux.microsoft.com>, "alok.a.tiwari@...cle.com"
<alok.a.tiwari@...cle.com>, "arnd@...db.de" <arnd@...db.de>, "bp@...en8.de"
<bp@...en8.de>, "corbet@....net" <corbet@....net>,
"dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
"decui@...rosoft.com" <decui@...rosoft.com>, "haiyangz@...rosoft.com"
<haiyangz@...rosoft.com>, "hpa@...or.com" <hpa@...or.com>,
"kys@...rosoft.com" <kys@...rosoft.com>, "mingo@...hat.com"
<mingo@...hat.com>, "rdunlap@...radead.org" <rdunlap@...radead.org>,
"tglx@...utronix.de" <tglx@...utronix.de>, "Tianyu.Lan@...rosoft.com"
<Tianyu.Lan@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
"linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>,
"linux-coco@...ts.linux.dev" <linux-coco@...ts.linux.dev>,
"linux-doc@...r.kernel.org" <linux-doc@...r.kernel.org>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"x86@...nel.org" <x86@...nel.org>
CC: "apais@...rosoft.com" <apais@...rosoft.com>, "benhill@...rosoft.com"
<benhill@...rosoft.com>, "bperkins@...rosoft.com" <bperkins@...rosoft.com>,
"sunilmut@...rosoft.com" <sunilmut@...rosoft.com>
Subject: RE: [PATCH hyperv-next v4 06/16] Drivers: hv: Allocate the paravisor
SynIC pages when required
From: Roman Kisel <romank@...ux.microsoft.com> Sent: Monday, July 14, 2025 3:16 PM
>
> Confidential VMBus requires interacting with two SynICs -- one
> provided by the host hypervisor, and one provided by the paravisor.
> Each SynIC requires its own message and event pages.
>
> Refactor and extend the existing code to add allocating and freeing
> the message and event pages for the paravisor SynIC when it is
> present.
>
> Signed-off-by: Roman Kisel <romank@...ux.microsoft.com>
Modulo a nit called out below,
Reviewed-by: Michael Kelley <mhklinux@...look.com>
> ---
> drivers/hv/hv.c | 184 +++++++++++++++++++-------------------
> drivers/hv/hyperv_vmbus.h | 18 ++++
> 2 files changed, 112 insertions(+), 90 deletions(-)
>
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 964b9102477d..e9ee0177d765 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -94,10 +94,70 @@ int hv_post_message(union hv_connection_id connection_id,
> return hv_result(status);
> }
>
> +static int hv_alloc_page(void **page, bool decrypt, const char *note)
> +{
> + int ret = 0;
> +
> + /*
> + * After the page changes its encryption status, its contents might
> + * appear scrambled on some hardware. Thus `get_zeroed_page` would
> + * zero the page out in vain, so do that explicitly exactly once.
> + *
> + * By default, the page is allocated encrypted in a CoCo VM.
> + */
> + *page = (void *)__get_free_page(GFP_KERNEL);
> + if (!*page)
> + return -ENOMEM;
> +
> + if (decrypt)
> + ret = set_memory_decrypted((unsigned long)*page, 1);
> + if (ret)
> + goto failed;
> +
> + memset(*page, 0, PAGE_SIZE);
> + return 0;
> +
> +failed:
> + /*
> + * Report the failure but don't put the page back on the free list as
> + * its encryption status is unknown.
> + */
> + pr_err("allocation failed for %s page, error %d, decrypted %d\n",
> + note, ret, decrypt);
> + *page = NULL;
> + return ret;
> +}
> +
> +static int hv_free_page(void **page, bool encrypt, const char *note)
> +{
> + int ret = 0;
> +
> + if (!*page)
> + return 0;
> +
> + if (encrypt)
> + ret = set_memory_encrypted((unsigned long)*page, 1);
> +
> + /*
> + * In the case of the failure, the page is leaked. Something is wrong,
> + * prefer to lose the page with the unknown encryption status and stay afloat.
> + */
> + if (ret) {
> + pr_err("deallocation failed for %s page, error %d, encrypt %d\n",
> + note, ret, encrypt);
> + } else
Nit: The braces here are unnecessary.
> + free_page((unsigned long)*page);
> +
> + *page = NULL;
> +
> + return ret;
> +}
> +
> int hv_synic_alloc(void)
> {
> int cpu, ret = -ENOMEM;
> struct hv_per_cpu_context *hv_cpu;
> + const bool decrypt = !vmbus_is_confidential();
>
> /*
> * First, zero all per-cpu memory areas so hv_synic_free() can
> @@ -123,73 +183,37 @@ int hv_synic_alloc(void)
> vmbus_on_msg_dpc, (unsigned long)hv_cpu);
>
> if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
> - hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
> - if (!hv_cpu->post_msg_page) {
> - pr_err("Unable to allocate post msg page\n");
> + ret = hv_alloc_page(&hv_cpu->post_msg_page,
> + decrypt, "post msg");
> + if (ret)
> goto err;
> - }
> -
> - ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1);
> - if (ret) {
> - pr_err("Failed to decrypt post msg page: %d\n", ret);
> - /* Just leak the page, as it's unsafe to free the page. */
> - hv_cpu->post_msg_page = NULL;
> - goto err;
> - }
> -
> - memset(hv_cpu->post_msg_page, 0, PAGE_SIZE);
> }
>
> /*
> - * Synic message and event pages are allocated by paravisor.
> - * Skip these pages allocation here.
> + * If these SynIC pages are not allocated, SIEF and SIM pages
> + * are configured using what the root partition or the paravisor
> + * provides upon reading the SIEFP and SIMP registers.
> */
> if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
> - hv_cpu->hyp_synic_message_page =
> - (void *)get_zeroed_page(GFP_ATOMIC);
> - if (!hv_cpu->hyp_synic_message_page) {
> - pr_err("Unable to allocate SYNIC message page\n");
> + ret = hv_alloc_page(&hv_cpu->hyp_synic_message_page,
> + decrypt, "hypervisor SynIC msg");
> + if (ret)
> goto err;
> - }
> -
> - hv_cpu->hyp_synic_event_page =
> - (void *)get_zeroed_page(GFP_ATOMIC);
> - if (!hv_cpu->hyp_synic_event_page) {
> - pr_err("Unable to allocate SYNIC event page\n");
> -
> - free_page((unsigned long)hv_cpu->hyp_synic_message_page);
> - hv_cpu->hyp_synic_message_page = NULL;
> + ret = hv_alloc_page(&hv_cpu->hyp_synic_event_page,
> + decrypt, "hypervisor SynIC event");
> + if (ret)
> goto err;
> - }
> }
>
> - if (!ms_hyperv.paravisor_present &&
> - (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
> - ret = set_memory_decrypted((unsigned long)
> - hv_cpu->hyp_synic_message_page, 1);
> - if (ret) {
> - pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
> - hv_cpu->hyp_synic_message_page = NULL;
> -
> - /*
> - * Free the event page here so that hv_synic_free()
> - * won't later try to re-encrypt it.
> - */
> - free_page((unsigned long)hv_cpu->hyp_synic_event_page);
> - hv_cpu->hyp_synic_event_page = NULL;
> + if (vmbus_is_confidential()) {
> + ret = hv_alloc_page(&hv_cpu->para_synic_message_page,
> + false, "paravisor SynIC msg");
> + if (ret)
> goto err;
> - }
> -
> - ret = set_memory_decrypted((unsigned long)
> - hv_cpu->hyp_synic_event_page, 1);
> - if (ret) {
> - pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
> - hv_cpu->hyp_synic_event_page = NULL;
> + ret = hv_alloc_page(&hv_cpu->para_synic_event_page,
> + false, "paravisor SynIC event");
> + if (ret)
> goto err;
> - }
> -
> - memset(hv_cpu->hyp_synic_message_page, 0, PAGE_SIZE);
> - memset(hv_cpu->hyp_synic_event_page, 0, PAGE_SIZE);
> }
> }
>
> @@ -205,48 +229,28 @@ int hv_synic_alloc(void)
>
> void hv_synic_free(void)
> {
> - int cpu, ret;
> + int cpu;
> + const bool encrypt = !vmbus_is_confidential();
>
> for_each_present_cpu(cpu) {
> struct hv_per_cpu_context *hv_cpu =
> per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - /* It's better to leak the page if the encryption fails. */
> - if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
> - if (hv_cpu->post_msg_page) {
> - ret = set_memory_encrypted((unsigned long)
> - hv_cpu->post_msg_page, 1);
> - if (ret) {
> - pr_err("Failed to encrypt post msg page: %d\n", ret);
> - hv_cpu->post_msg_page = NULL;
> - }
> - }
> + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx())
> + hv_free_page(&hv_cpu->post_msg_page,
> + encrypt, "post msg");
> + if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
> + hv_free_page(&hv_cpu->hyp_synic_event_page,
> + encrypt, "hypervisor SynIC event");
> + hv_free_page(&hv_cpu->hyp_synic_message_page,
> + encrypt, "hypervisor SynIC msg");
> }
> -
> - if (!ms_hyperv.paravisor_present &&
> - (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
> - if (hv_cpu->hyp_synic_message_page) {
> - ret = set_memory_encrypted((unsigned long)
> - hv_cpu->hyp_synic_message_page, 1);
> - if (ret) {
> - pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
> - hv_cpu->hyp_synic_message_page = NULL;
> - }
> - }
> -
> - if (hv_cpu->hyp_synic_event_page) {
> - ret = set_memory_encrypted((unsigned long)
> - hv_cpu->hyp_synic_event_page, 1);
> - if (ret) {
> - pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
> - hv_cpu->hyp_synic_event_page = NULL;
> - }
> - }
> + if (vmbus_is_confidential()) {
> + hv_free_page(&hv_cpu->para_synic_event_page,
> + false, "paravisor SynIC event");
> + hv_free_page(&hv_cpu->para_synic_message_page,
> + false, "paravisor SynIC msg");
> }
> -
> - free_page((unsigned long)hv_cpu->post_msg_page);
> - free_page((unsigned long)hv_cpu->hyp_synic_event_page);
> - free_page((unsigned long)hv_cpu->hyp_synic_message_page);
> }
>
> kfree(hv_context.hv_numa_map);
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index fc3cdb26ff1a..16b5cf1bca19 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -120,8 +120,26 @@ enum {
> * Per cpu state for channel handling
> */
> struct hv_per_cpu_context {
> + /*
> + * SynIC pages for communicating with the host.
> + *
> + * These pages are accessible to the host partition and the hypervisor.
> + * They may be used for exchanging data with the host partition and the
> + * hypervisor even when they aren't trusted yet the guest partition
> + * must be prepared to handle the malicious behavior.
> + */
> void *hyp_synic_message_page;
> void *hyp_synic_event_page;
> + /*
> + * SynIC pages for communicating with the paravisor.
> + *
> + * These pages may be accessed from within the guest partition only in
> + * CoCo VMs. Neither the host partition nor the hypervisor can access
> + * these pages in that case; they are used for exchanging data with the
> + * paravisor.
> + */
> + void *para_synic_message_page;
> + void *para_synic_event_page;
>
> /*
> * The page is only used in hv_post_message() for a TDX VM (with the
> --
> 2.43.0
Powered by blists - more mailing lists