[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250714221545.5615-16-romank@linux.microsoft.com>
Date: Mon, 14 Jul 2025 15:15:44 -0700
From: Roman Kisel <romank@...ux.microsoft.com>
To: alok.a.tiwari@...cle.com,
arnd@...db.de,
bp@...en8.de,
corbet@....net,
dave.hansen@...ux.intel.com,
decui@...rosoft.com,
haiyangz@...rosoft.com,
hpa@...or.com,
kys@...rosoft.com,
mhklinux@...look.com,
mingo@...hat.com,
rdunlap@...radead.org,
tglx@...utronix.de,
Tianyu.Lan@...rosoft.com,
wei.liu@...nel.org,
linux-arch@...r.kernel.org,
linux-coco@...ts.linux.dev,
linux-doc@...r.kernel.org,
linux-hyperv@...r.kernel.org,
linux-kernel@...r.kernel.org,
x86@...nel.org
Cc: apais@...rosoft.com,
benhill@...rosoft.com,
bperkins@...rosoft.com,
sunilmut@...rosoft.com
Subject: [PATCH hyperv-next v4 15/16] Drivers: hv: Support establishing the confidential VMBus connection
To establish the confidential VMBus connection the CoCo VM guest
first attempts to connect to the VMBus server run by the paravisor.
If that fails, the guest falls back to the non-confidential VMBus.
Implement that in the VMBus driver initialization.
Signed-off-by: Roman Kisel <romank@...ux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 189 ++++++++++++++++++++++++++++-------------
1 file changed, 130 insertions(+), 59 deletions(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 13aca5abc7d8..53be3157e22c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1057,12 +1057,9 @@ static void vmbus_onmessage_work(struct work_struct *work)
kfree(ctx);
}
-void vmbus_on_msg_dpc(unsigned long data)
+static void __vmbus_on_msg_dpc(void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu = (void *)data;
- void *page_addr = hv_cpu->hyp_synic_message_page;
- struct hv_message msg_copy, *msg = (struct hv_message *)page_addr +
- VMBUS_MESSAGE_SINT;
+ struct hv_message msg_copy, *msg;
struct vmbus_channel_message_header *hdr;
enum vmbus_channel_message_type msgtype;
const struct vmbus_channel_message_table_entry *entry;
@@ -1070,6 +1067,10 @@ void vmbus_on_msg_dpc(unsigned long data)
__u8 payload_size;
u32 message_type;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
+
/*
* 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
* it is being used in 'struct vmbus_channel_message_header' definition
@@ -1195,6 +1196,14 @@ void vmbus_on_msg_dpc(unsigned long data)
vmbus_signal_eom(msg, message_type);
}
+void vmbus_on_msg_dpc(unsigned long data)
+{
+ struct hv_per_cpu_context *hv_cpu = (void *)data;
+
+ __vmbus_on_msg_dpc(hv_cpu->hyp_synic_message_page);
+ __vmbus_on_msg_dpc(hv_cpu->para_synic_message_page);
+}
+
#ifdef CONFIG_PM_SLEEP
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
@@ -1233,21 +1242,19 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
#endif /* CONFIG_PM_SLEEP */
/*
- * Schedule all channels with events pending
+ * Schedule all channels with events pending.
+ * The event page can be directly checked to get the id of
+ * the channel that has the interrupt pending.
*/
-static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
+static void vmbus_chan_sched(void *event_page_addr)
{
unsigned long *recv_int_page;
u32 maxbits, relid;
+ union hv_synic_event_flags *event;
- /*
- * The event page can be directly checked to get the id of
- * the channel that has the interrupt pending.
- */
- void *page_addr = hv_cpu->hyp_synic_event_page;
- union hv_synic_event_flags *event
- = (union hv_synic_event_flags *)page_addr +
- VMBUS_MESSAGE_SINT;
+ if (!event_page_addr)
+ return;
+ event = (union hv_synic_event_flags *)event_page_addr + VMBUS_MESSAGE_SINT;
maxbits = HV_EVENT_FLAGS_COUNT;
recv_int_page = event->flags;
@@ -1255,6 +1262,11 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
if (unlikely(!recv_int_page))
return;
+ /*
+ * Suggested-by: Michael Kelley <mhklinux@...look.com>
+ * One possible optimization would be to keep track of the largest relID that's in use,
+ * and only scan up to that relID.
+ */
for_each_set_bit(relid, recv_int_page, maxbits) {
void (*callback_fn)(void *context);
struct vmbus_channel *channel;
@@ -1318,26 +1330,35 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
}
}
-static void vmbus_isr(void)
+static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
- void *page_addr;
struct hv_message *msg;
- vmbus_chan_sched(hv_cpu);
-
- page_addr = hv_cpu->hyp_synic_message_page;
- msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
hv_stimer0_isr();
vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
- } else
+ } else {
tasklet_schedule(&hv_cpu->msg_dpc);
+ }
}
+}
+
+static void vmbus_isr(void)
+{
+ struct hv_per_cpu_context *hv_cpu
+ = this_cpu_ptr(hv_context.cpu_context);
+
+ vmbus_chan_sched(hv_cpu->hyp_synic_event_page);
+ vmbus_chan_sched(hv_cpu->para_synic_event_page);
+
+ vmbus_message_sched(hv_cpu, hv_cpu->hyp_synic_message_page);
+ vmbus_message_sched(hv_cpu, hv_cpu->para_synic_message_page);
add_interrupt_randomness(vmbus_interrupt);
}
@@ -1355,6 +1376,59 @@ static void vmbus_percpu_work(struct work_struct *work)
hv_synic_init(cpu);
}
+static int vmbus_alloc_synic_and_connect(void)
+{
+ int ret, cpu;
+ struct work_struct __percpu *works;
+ int hyperv_cpuhp_online;
+
+ ret = hv_synic_alloc();
+ if (ret < 0)
+ goto err_alloc;
+
+ works = alloc_percpu(struct work_struct);
+ if (!works) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+
+ /*
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
+ */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct work_struct *work = per_cpu_ptr(works, cpu);
+
+ INIT_WORK(work, vmbus_percpu_work);
+ schedule_work_on(cpu, work);
+ }
+
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
+
+ /* Register the callbacks for possible CPU online/offline'ing */
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
+ hv_synic_init, hv_synic_cleanup);
+ cpus_read_unlock();
+ free_percpu(works);
+ if (ret < 0)
+ goto err_alloc;
+ hyperv_cpuhp_online = ret;
+
+ ret = vmbus_connect();
+ if (ret)
+ goto err_connect;
+ return 0;
+
+err_connect:
+ cpuhp_remove_state(hyperv_cpuhp_online);
+ return -ENODEV;
+err_alloc:
+ hv_synic_free();
+ return -ENOMEM;
+}
+
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
*
@@ -1365,8 +1439,7 @@ static void vmbus_percpu_work(struct work_struct *work)
*/
static int vmbus_bus_init(void)
{
- int ret, cpu;
- struct work_struct __percpu *works;
+ int ret;
ret = hv_init();
if (ret != 0) {
@@ -1401,41 +1474,42 @@ static int vmbus_bus_init(void)
}
}
- ret = hv_synic_alloc();
- if (ret)
- goto err_alloc;
-
- works = alloc_percpu(struct work_struct);
- if (!works) {
- ret = -ENOMEM;
- goto err_alloc;
- }
-
/*
- * Initialize the per-cpu interrupt state and stimer state.
- * Then connect to the host.
+ * Attempt to establish the confidential VMBus connection first if this VM is
+ * a hardware confidential VM, and the paravisor is present.
+ *
+ * All scenarios here are:
+ * 1. No paravisor,
+ * 2. Paravisor without VMBus relay, no hardware isolation,
+ * 3. Paravisor without VMBus relay, with hardware isolation,
+ * 4. Paravisor with VMBus relay, no hardware isolation,
+ * 5. Paravisor with VMBus relay, with hardware isolation.
+ *
+ * In the cloud, scenarios 1, 4, 5 are most common, and outside the cloud,
+ * scenario 1 should be the most common at the moment. Detecting of the Confidential
+ * VMBus support below takes that into account running `vmbus_alloc_synic_and_connect()`
+ * only once (barring any faults not related to VMBus) in these cases. That is true
+ * for the scenario 2, too, albeit it might be not as feature-rich as 1, 4, 5.
+ *
+ * However, the code will be doing much more work in scenario 3 where it will have to
+ * first initialize lots of structures for every CPU only to likely tear them down later
+ * and start again, now without attempting to use Confidential VMBus, thus taking a
+ * performance hit. Such systems are rather uncomoon today, don't support more than
+ * ~300 CPUs, and are rarely used with many dozens of CPUs. As the time goes on, that
+ * will be even less common. Hence, the preference is to not specialize the code for
+ * that scenario.
*/
- cpus_read_lock();
- for_each_online_cpu(cpu) {
- struct work_struct *work = per_cpu_ptr(works, cpu);
+ ret = -ENODEV;
+ if (ms_hyperv.paravisor_present && (hv_isolation_type_tdx() || hv_isolation_type_snp())) {
+ is_confidential = true;
+ ret = vmbus_alloc_synic_and_connect();
+ is_confidential = !ret;
- INIT_WORK(work, vmbus_percpu_work);
- schedule_work_on(cpu, work);
+ pr_info("VMBus is confidential: %d\n", is_confidential);
}
- for_each_online_cpu(cpu)
- flush_work(per_cpu_ptr(works, cpu));
-
- /* Register the callbacks for possible CPU online/offline'ing */
- ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
- hv_synic_init, hv_synic_cleanup);
- cpus_read_unlock();
- free_percpu(works);
- if (ret < 0)
- goto err_alloc;
- hyperv_cpuhp_online = ret;
-
- ret = vmbus_connect();
+ if (!is_confidential)
+ ret = vmbus_alloc_synic_and_connect();
if (ret)
goto err_connect;
@@ -1451,9 +1525,6 @@ static int vmbus_bus_init(void)
return 0;
err_connect:
- cpuhp_remove_state(hyperv_cpuhp_online);
-err_alloc:
- hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
--
2.43.0
Powered by blists - more mailing lists