[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1760727497-21158-1-git-send-email-nunodasneves@linux.microsoft.com>
Date: Fri, 17 Oct 2025 11:58:17 -0700
From: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To: linux-hyperv@...r.kernel.org,
linux-kernel@...r.kernel.org,
mhklinux@...look.com
Cc: kys@...rosoft.com,
haiyangz@...rosoft.com,
wei.liu@...nel.org,
decui@...rosoft.com,
arnd@...db.de,
mrathor@...ux.microsoft.com,
skinsburskii@...ux.microsoft.com,
Nuno Das Neves <nunodasneves@...ux.microsoft.com>
Subject: [PATCH v2] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
-EAGAIN to userspace. The expectation is that the VMM will retry.
However, some VMM code in the wild doesn't do this and simply fails.
Rather than force the VMM to retry, change the ioctl to deposit
memory on demand and immediately retry the hypercall as is done with
all the other hypercall helper functions.
In addition to making the ioctl easier to use, removing the need for
multiple syscalls improves performance.
There is a complication: unlike the other hypercall helper functions,
in MSHV_ROOT_HVCALL the input is opaque to the kernel. This is
problematic for rep hypercalls, because the next part of the input
list can't be copied on each loop after depositing pages (this was
the original reason for returning -EAGAIN in this case).
Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
parameter. This solves the issue, allowing the deposit loop in
MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
partway through.
Signed-off-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
---
Changes in v2:
- Improve commit message [Michael]
- Fix up some incorrect/incomplete comments [Michael]
---
drivers/hv/mshv_root_main.c | 58 ++++++++++++++++++----------------
include/asm-generic/mshyperv.h | 17 ++++++++--
2 files changed, 44 insertions(+), 31 deletions(-)
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 9ae67c6e9f60..64e1bdf3b57c 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -159,6 +159,7 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
unsigned int pages_order;
void *input_pg = NULL;
void *output_pg = NULL;
+ u16 reps_completed;
if (copy_from_user(&args, user_args, sizeof(args)))
return -EFAULT;
@@ -210,41 +211,42 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
*/
*(u64 *)input_pg = partition->pt_id;
- if (args.reps)
- status = hv_do_rep_hypercall(args.code, args.reps, 0,
- input_pg, output_pg);
- else
- status = hv_do_hypercall(args.code, input_pg, output_pg);
-
- if (hv_result(status) == HV_STATUS_CALL_PENDING) {
- if (is_async) {
- mshv_async_hvcall_handler(partition, &status);
- } else { /* Paranoia check. This shouldn't happen! */
- ret = -EBADFD;
- goto free_pages_out;
+ reps_completed = 0;
+ do {
+ if (args.reps) {
+ status = hv_do_rep_hypercall_ex(args.code, args.reps,
+ 0, reps_completed,
+ input_pg, output_pg);
+ reps_completed = hv_repcomp(status);
+ } else {
+ status = hv_do_hypercall(args.code, input_pg, output_pg);
}
- }
- if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
- ret = hv_call_deposit_pages(NUMA_NO_NODE, partition->pt_id, 1);
- if (!ret)
- ret = -EAGAIN;
- } else if (!hv_result_success(status)) {
- ret = hv_result_to_errno(status);
- }
+ if (hv_result(status) == HV_STATUS_CALL_PENDING) {
+ if (is_async) {
+ mshv_async_hvcall_handler(partition, &status);
+ } else { /* Paranoia check. This shouldn't happen! */
+ ret = -EBADFD;
+ goto free_pages_out;
+ }
+ }
+
+ if (hv_result_success(status))
+ break;
+
+ if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
+ ret = hv_result_to_errno(status);
+ else
+ ret = hv_call_deposit_pages(NUMA_NO_NODE,
+ partition->pt_id, 1);
+ } while (!ret);
- /*
- * Always return the status and output data regardless of result.
- * The VMM may need it to determine how to proceed. E.g. the status may
- * contain the number of reps completed if a rep hypercall partially
- * succeeded.
- */
args.status = hv_result(status);
- args.reps = args.reps ? hv_repcomp(status) : 0;
+ args.reps = reps_completed;
if (copy_to_user(user_args, &args, sizeof(args)))
ret = -EFAULT;
- if (output_pg &&
+ if (!ret && output_pg &&
copy_to_user((void __user *)args.out_ptr, output_pg, args.out_sz))
ret = -EFAULT;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index ebf458dbcf84..ecedab554c80 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -126,10 +126,12 @@ static inline unsigned int hv_repcomp(u64 status)
/*
* Rep hypercalls. Callers of this functions are supposed to ensure that
- * rep_count and varhead_size comply with Hyper-V hypercall definition.
+ * rep_count, varhead_size, and rep_start comply with Hyper-V hypercall
+ * definition.
*/
-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
- void *input, void *output)
+static inline u64 hv_do_rep_hypercall_ex(u16 code, u16 rep_count,
+ u16 varhead_size, u16 rep_start,
+ void *input, void *output)
{
u64 control = code;
u64 status;
@@ -137,6 +139,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+ control |= (u64)rep_start << HV_HYPERCALL_REP_START_OFFSET;
do {
status = hv_do_hypercall(control, input, output);
@@ -154,6 +157,14 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
return status;
}
+/* For the typical case where rep_start is 0 */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+ void *input, void *output)
+{
+ return hv_do_rep_hypercall_ex(code, rep_count, varhead_size, 0,
+ input, output);
+}
+
/* Generate the guest OS identifier as described in the Hyper-V TLFS */
static inline u64 hv_generate_guest_id(u64 kernel_version)
{
--
2.34.1
Powered by blists - more mailing lists