[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250110200507.120452-1-hamzamahfooz@linux.microsoft.com>
Date: Fri, 10 Jan 2025 15:05:06 -0500
From: Hamza Mahfooz <hamzamahfooz@...ux.microsoft.com>
To: linux-hyperv@...r.kernel.org
Cc: Hamza Mahfooz <hamzamahfooz@...ux.microsoft.com>,
Boqun Feng <boqun.feng@...il.com>,
Wei Liu <wei.liu@...nel.org>,
"K. Y. Srinivasan" <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
Dexuan Cui <decui@...rosoft.com>,
linux-kernel@...r.kernel.org
Subject: [PATCH] drivers/hv: add CPU offlining support
Currently, it is effectively impossible to offline CPUs. Since, most
CPUs will have vmbus channels attached to them. So, as made mention of
in commit d570aec0f2154 ("Drivers: hv: vmbus: Synchronize
init_vp_index() vs. CPU hotplug"), rebind channels associated with CPUs
that a user is trying to offline to a new "randomly" selected CPU.
Cc: Boqun Feng <boqun.feng@...il.com>
Cc: Wei Liu <wei.liu@...nel.org>
Signed-off-by: Hamza Mahfooz <hamzamahfooz@...ux.microsoft.com>
---
drivers/hv/hv.c | 57 +++++++++++++++++++++++++++++++-----------
drivers/hv/vmbus_drv.c | 51 +++++++++++++++++++++----------------
include/linux/hyperv.h | 1 +
3 files changed, 73 insertions(+), 36 deletions(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 36d9ba097ff5..42270a7a7a19 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -433,13 +433,40 @@ static bool hv_synic_event_pending(void)
return pending;
}
+static int hv_pick_new_cpu(struct vmbus_channel *channel,
+ unsigned int current_cpu)
+{
+ int ret = 0;
+ int cpu;
+
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
+
+ /*
+ * We can't assume that the relevant interrupts will be sent before
+ * the cpu is offlined on older versions of hyperv.
+ */
+ if (vmbus_proto_version < VERSION_WIN10_V5_3)
+ return -EBUSY;
+
+ cpus_read_lock();
+ cpu = cpumask_next(get_random_u32_below(nr_cpu_ids), cpu_online_mask);
+
+ if (cpu >= nr_cpu_ids || cpu == current_cpu)
+ cpu = VMBUS_CONNECT_CPU;
+
+ ret = vmbus_channel_set_cpu(channel, cpu);
+ cpus_read_unlock();
+
+ return ret;
+}
+
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
int hv_synic_cleanup(unsigned int cpu)
{
struct vmbus_channel *channel, *sc;
- bool channel_found = false;
+ int ret = 0;
if (vmbus_connection.conn_state != CONNECTED)
goto always_cleanup;
@@ -456,31 +483,31 @@ int hv_synic_cleanup(unsigned int cpu)
/*
* Search for channels which are bound to the CPU we're about to
- * cleanup. In case we find one and vmbus is still connected, we
- * fail; this will effectively prevent CPU offlining.
- *
- * TODO: Re-bind the channels to different CPUs.
+ * cleanup.
*/
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->target_cpu == cpu) {
- channel_found = true;
- break;
+ ret = hv_pick_new_cpu(channel, cpu);
+
+ if (ret) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ return ret;
+ }
}
list_for_each_entry(sc, &channel->sc_list, sc_list) {
if (sc->target_cpu == cpu) {
- channel_found = true;
- break;
+ ret = hv_pick_new_cpu(channel, cpu);
+
+ if (ret) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ return ret;
+ }
}
}
- if (channel_found)
- break;
}
mutex_unlock(&vmbus_connection.channel_mutex);
- if (channel_found)
- return -EBUSY;
-
/*
* channel_found == false means that any channels that were previously
* assigned to the CPU have been reassigned elsewhere with a call of
@@ -497,5 +524,5 @@ int hv_synic_cleanup(unsigned int cpu)
hv_synic_disable_regs(cpu);
- return 0;
+ return ret;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2892b8da20a5..c256e02fa66b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1611,16 +1611,15 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%u\n", channel->target_cpu);
}
-static ssize_t target_cpu_store(struct vmbus_channel *channel,
- const char *buf, size_t count)
+
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
{
- u32 target_cpu, origin_cpu;
- ssize_t ret = count;
+ u32 origin_cpu;
+ int ret = 0;
- if (vmbus_proto_version < VERSION_WIN10_V4_1)
- return -EIO;
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
- if (sscanf(buf, "%uu", &target_cpu) != 1)
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
return -EIO;
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
@@ -1630,22 +1629,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
return -EINVAL;
- /* No CPUs should come up or down during this. */
- cpus_read_lock();
-
- if (!cpu_online(target_cpu)) {
- cpus_read_unlock();
+ if (!cpu_online(target_cpu))
return -EINVAL;
- }
/*
- * Synchronizes target_cpu_store() and channel closure:
+ * Synchronizes vmbus_channel_set_cpu() and channel closure:
*
* { Initially: state = CHANNEL_OPENED }
*
* CPU1 CPU2
*
- * [target_cpu_store()] [vmbus_disconnect_ring()]
+ * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
*
* LOCK channel_mutex LOCK channel_mutex
* LOAD r1 = state LOAD r2 = state
@@ -1660,7 +1654,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* Note. The host processes the channel messages "sequentially", in
* the order in which they are received on a per-partition basis.
*/
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
@@ -1668,17 +1661,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
*/
if (channel->state != CHANNEL_OPENED_STATE) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
origin_cpu = channel->target_cpu;
if (target_cpu == origin_cpu)
- goto cpu_store_unlock;
+ goto end;
if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
/*
@@ -1708,9 +1701,25 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
origin_cpu, target_cpu);
}
-cpu_store_unlock:
- mutex_unlock(&vmbus_connection.channel_mutex);
+end:
+ return ret;
+}
+
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
+ const char *buf, size_t count)
+{
+ ssize_t ret = count;
+ u32 target_cpu;
+
+ if (sscanf(buf, "%uu", &target_cpu) != 1)
+ return -EIO;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ cpus_read_lock();
+ ret = vmbus_channel_set_cpu(channel, target_cpu);
cpus_read_unlock();
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
return ret;
}
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 02a226bcf0ed..25e9e982f1b0 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1670,6 +1670,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
/* Get the start of the ring buffer. */
static inline void *
--
2.47.1
Powered by blists - more mailing lists