lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
 <SN6PR02MB415718474E9A7DEA70D27E00D4E7A@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Thu, 2 Oct 2025 00:16:21 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Nuno Das Neves <nunodasneves@...ux.microsoft.com>,
	"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"prapal@...ux.microsoft.com" <prapal@...ux.microsoft.com>,
	"easwar.hariharan@...ux.microsoft.com"
	<easwar.hariharan@...ux.microsoft.com>, "tiala@...rosoft.com"
	<tiala@...rosoft.com>, "anirudh@...rudhrb.com" <anirudh@...rudhrb.com>,
	"paekkaladevi@...ux.microsoft.com" <paekkaladevi@...ux.microsoft.com>,
	"skinsburskii@...ux.microsoft.com" <skinsburskii@...ux.microsoft.com>
CC: "kys@...rosoft.com" <kys@...rosoft.com>, "haiyangz@...rosoft.com"
	<haiyangz@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
	"decui@...rosoft.com" <decui@...rosoft.com>, Jinank Jain
	<jinankjain@...ux.microsoft.com>
Subject: RE: [PATCH v4 5/5] mshv: Introduce new hypercall to map stats page
 for L1VH partitions

From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Wednesday, October 1, 2025 4:03 PM
> 
> On 9/29/2025 10:57 AM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Friday, September 26, 2025 9:23 AM
> >>
> >> From: Jinank Jain <jinankjain@...ux.microsoft.com>
> >>

[snip]

> >> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> >> index 98c6278ff151..5a805b3dec0b 100644
> >> --- a/drivers/hv/mshv_root_hv_call.c
> >> +++ b/drivers/hv/mshv_root_hv_call.c
> >> @@ -804,9 +804,51 @@ hv_call_notify_port_ring_empty(u32 sint_index)
> >>  	return hv_result_to_errno(status);
> >>  }
> >>
> >> -int hv_call_map_stat_page(enum hv_stats_object_type type,
> >> -			  const union hv_stats_object_identity *identity,
> >> -			  void **addr)
> >> +static int hv_call_map_stats_page2(enum hv_stats_object_type type,
> >> +				   const union hv_stats_object_identity *identity,
> >> +				   u64 map_location)
> >> +{
> >> +	unsigned long flags;
> >> +	struct hv_input_map_stats_page2 *input;
> >> +	u64 status;
> >> +	int ret;
> >> +
> >> +	if (!map_location || !mshv_use_overlay_gpfn())
> >> +		return -EINVAL;
> >> +
> >> +	do {
> >> +		local_irq_save(flags);
> >> +		input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> >> +
> >> +		memset(input, 0, sizeof(*input));
> >> +		input->type = type;
> >> +		input->identity = *identity;
> >> +		input->map_location = map_location;
> >> +
> >> +		status = hv_do_hypercall(HVCALL_MAP_STATS_PAGE2, input, NULL);
> >> +
> >> +		local_irq_restore(flags);
> >> +
> >> +		ret = hv_result_to_errno(status);
> >> +
> >> +		if (!ret)
> >> +			break;
> >
> > This logic is incorrect. If the hypercall returns
> > HV_STATUS_INSUFFICIENT_MEMORY, then errno -ENOMEM is immediately
> > returned to the caller without any opportunity to do hv_call_deposit_pages().
> >
> The loop breaks if (!ret), i.e. on success. Maybe you misread it as `if (ret)`?

Pfft!  Indeed I had it wrong.

I'll note that hv_call_map_stats_page2() and the existing hv_call_map_stats_page()
have different code for sorting out success vs. insufficient memory vs. other errors.
And there are additional code variations in other MSHV places that must check for
insufficient memory. Some consistency might be nice, but that's out of scope
for this patch set.

> >> +
> >> +		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> >> +			hv_status_debug(status, "\n");
> >> +			break;
> >> +		}
> >> +
> >> +		ret = hv_call_deposit_pages(NUMA_NO_NODE,
> >> +					    hv_current_partition_id, 1);
> >> +	} while (!ret);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int hv_call_map_stats_page(enum hv_stats_object_type type,
> >> +				  const union hv_stats_object_identity *identity,
> >> +				  void **addr)
> >>  {
> >>  	unsigned long flags;
> >>  	struct hv_input_map_stats_page *input;
> >> @@ -845,8 +887,36 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
> >>  	return ret;
> >>  }
> >>
> >> -int hv_call_unmap_stat_page(enum hv_stats_object_type type,
> >> -			    const union hv_stats_object_identity *identity)
> >> +int hv_map_stats_page(enum hv_stats_object_type type,
> >> +		      const union hv_stats_object_identity *identity,
> >> +		      void **addr)
> >> +{
> >> +	int ret;
> >> +	struct page *allocated_page = NULL;
> >> +
> >> +	if (!addr)
> >> +		return -EINVAL;
> >> +
> >> +	if (mshv_use_overlay_gpfn()) {
> >> +		allocated_page = alloc_page(GFP_KERNEL);
> >> +		if (!allocated_page)
> >> +			return -ENOMEM;
> >> +
> >> +		ret = hv_call_map_stats_page2(type, identity,
> >> +					      page_to_pfn(allocated_page));
> >
> > This should use page_to_hvpfn() per my comments in Patch 4 of this series.
> >
> I could change it, but as mentioned in my reply to Patch 4 I'm not sure it's
> worth doing without addressing the whole issue which is a much bigger ask.

Agreed.

Michael

> 
> >> +		*addr = page_address(allocated_page);
> >> +	} else {
> >> +		ret = hv_call_map_stats_page(type, identity, addr);
> >> +	}
> >> +
> >> +	if (ret && allocated_page)
> >> +		__free_page(allocated_page);
> >
> > Might want to do *addr = NULL after freeing the page so that the caller
> > can't erroneously reference the free page. Again, the current caller doesn't
> > do that, so current code isn't broken.
> >
> Yep, I can add it to provide a little more robustness against bugs in callers.
> 
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int hv_call_unmap_stats_page(enum hv_stats_object_type type,
> >> +				    const union hv_stats_object_identity *identity)
> >>  {
> >>  	unsigned long flags;
> >>  	struct hv_input_unmap_stats_page *input;
> >> @@ -865,6 +935,19 @@ int hv_call_unmap_stat_page(enum hv_stats_object_type
> type,
> >>  	return hv_result_to_errno(status);
> >>  }
> >>
> >> +int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
> >> +			const union hv_stats_object_identity *identity)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = hv_call_unmap_stats_page(type, identity);
> >> +
> >> +	if (mshv_use_overlay_gpfn() && page_addr)
> >> +		__free_page(virt_to_page(page_addr));
> >> +
> >> +	return ret;
> >> +}
> >> +
> >>  int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
> >>  				   u64 page_struct_count, u32 host_access,
> >>  				   u32 flags, u8 acquire)
> >> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> >> index 2d0ad17acde6..71a8ab5db3b8 100644
> >> --- a/drivers/hv/mshv_root_main.c
> >> +++ b/drivers/hv/mshv_root_main.c
> >> @@ -841,7 +841,8 @@ mshv_vp_release(struct inode *inode, struct file *filp)
> >>  	return 0;
> >>  }
> >>
> >> -static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index)
> >> +static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
> >> +				void *stats_pages[])
> >>  {
> >>  	union hv_stats_object_identity identity = {
> >>  		.vp.partition_id = partition_id,
> >> @@ -849,10 +850,10 @@ static void mshv_vp_stats_unmap(u64 partition_id, u32
> vp_index)
> >>  	};
> >>
> >>  	identity.vp.stats_area_type = HV_STATS_AREA_SELF;
> >> -	hv_call_unmap_stat_page(HV_STATS_OBJECT_VP, &identity);
> >> +	hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
> >>
> >>  	identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
> >> -	hv_call_unmap_stat_page(HV_STATS_OBJECT_VP, &identity);
> >> +	hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
> >>  }
> >>
> >>  static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
> >> @@ -865,14 +866,14 @@ static int mshv_vp_stats_map(u64 partition_id, u32
> vp_index,
> >>  	int err;
> >>
> >>  	identity.vp.stats_area_type = HV_STATS_AREA_SELF;
> >> -	err = hv_call_map_stat_page(HV_STATS_OBJECT_VP, &identity,
> >> -				    &stats_pages[HV_STATS_AREA_SELF]);
> >> +	err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
> >> +				&stats_pages[HV_STATS_AREA_SELF]);
> >>  	if (err)
> >>  		return err;
> >>
> >>  	identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
> >> -	err = hv_call_map_stat_page(HV_STATS_OBJECT_VP, &identity,
> >> -				    &stats_pages[HV_STATS_AREA_PARENT]);
> >> +	err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
> >> +				&stats_pages[HV_STATS_AREA_PARENT]);
> >>  	if (err)
> >>  		goto unmap_self;
> >>
> >> @@ -880,7 +881,7 @@ static int mshv_vp_stats_map(u64 partition_id, u32
> vp_index,
> >>
> >>  unmap_self:
> >>  	identity.vp.stats_area_type = HV_STATS_AREA_SELF;
> >> -	hv_call_unmap_stat_page(HV_STATS_OBJECT_VP, &identity);
> >> +	hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
> >>  	return err;
> >>  }
> >>
> >> @@ -988,7 +989,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition
> *partition,
> >>  	kfree(vp);
> >>  unmap_stats_pages:
> >>  	if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
> >> -		mshv_vp_stats_unmap(partition->pt_id, args.vp_index);
> >> +		mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages);
> >>  unmap_ghcb_page:
> >>  	if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available())
> >>  		hv_unmap_vp_state_page(partition->pt_id, args.vp_index,
> >> @@ -1740,7 +1741,8 @@ static void destroy_partition(struct mshv_partition
> *partition)
> >>  				continue;
> >>
> >>  			if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
> >> -				mshv_vp_stats_unmap(partition->pt_id, vp-
> >vp_index);
> >> +				mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
> >> +						    (void **)vp->vp_stats_pages);
> >>
> >>  			if (vp->vp_register_page) {
> >>  				(void)hv_unmap_vp_state_page(partition->pt_id,
> >> diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
> >> index ff4325fb623a..f66565106d21 100644
> >> --- a/include/hyperv/hvgdk_mini.h
> >> +++ b/include/hyperv/hvgdk_mini.h
> >> @@ -493,6 +493,7 @@ union hv_vp_assist_msr_contents {	 /*
> >> HV_REGISTER_VP_ASSIST_PAGE */
> >>  #define HVCALL_GET_PARTITION_PROPERTY_EX		0x0101
> >>  #define HVCALL_MMIO_READ				0x0106
> >>  #define HVCALL_MMIO_WRITE				0x0107
> >> +#define HVCALL_MAP_STATS_PAGE2				0x0131
> >>
> >>  /* HV_HYPERCALL_INPUT */
> >>  #define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
> >> diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
> >> index bf2ce27dfcc5..064bf735cab6 100644
> >> --- a/include/hyperv/hvhdk_mini.h
> >> +++ b/include/hyperv/hvhdk_mini.h
> >> @@ -177,6 +177,13 @@ struct hv_input_map_stats_page {
> >>  	union hv_stats_object_identity identity;
> >>  } __packed;
> >>
> >> +struct hv_input_map_stats_page2 {
> >> +	u32 type; /* enum hv_stats_object_type */
> >> +	u32 padding;
> >> +	union hv_stats_object_identity identity;
> >> +	u64 map_location;
> >> +} __packed;
> >> +
> >>  struct hv_output_map_stats_page {
> >>  	u64 map_location;
> >>  } __packed;
> >> --
> >> 2.34.1
> >>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ