lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <148ab9a6-4698-46ef-a137-0bb0c110a137@linux.microsoft.com>
Date: Wed, 3 Dec 2025 11:26:23 -0800
From: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To: Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>,
 kys@...rosoft.com, haiyangz@...rosoft.com, wei.liu@...nel.org,
 decui@...rosoft.com
Cc: linux-hyperv@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 5/6] Drivers: hv: Add refcount and locking to mem
 regions

On 12/3/2025 10:24 AM, Stanislav Kinsburskii wrote:
> Introduce kref-based reference counting and spinlock protection for
> memory regions in Hyper-V partition management. This change improves
> memory region lifecycle management and ensures thread-safe access to the
> region list.
> 
> Also improves the check for overlapped memory regions during region
> creation, preventing duplicate or conflicting mappings.
> 
> Previously, the regions list was protected by the partition mutex.
> However, this approach is too heavy for frequent fault and invalidation
> operations. Finer grained locking is now used to improve efficiency and
> concurrency.
> 
> This is a precursor to supporting movable memory regions. Fault and
> invalidation handling for movable regions will require safe traversal of
> the region list and holding a region reference while performing
> invalidation or fault operations.
> 
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>
> ---
>  drivers/hv/mshv_regions.c   |   19 ++++++++++++++++---
>  drivers/hv/mshv_root.h      |    6 +++++-
>  drivers/hv/mshv_root_main.c |   32 ++++++++++++++++++++++++--------
>  3 files changed, 45 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
> index 1356f68ccb29..94f33754f545 100644
> --- a/drivers/hv/mshv_regions.c
> +++ b/drivers/hv/mshv_regions.c
> @@ -7,6 +7,7 @@
>   * Authors: Microsoft Linux virtualization team
>   */
>  
> +#include <linux/kref.h>
>  #include <linux/mm.h>
>  #include <linux/vmalloc.h>
>  
> @@ -154,6 +155,8 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
>  	if (!is_mmio)
>  		region->flags.range_pinned = true;
>  
> +	kref_init(&region->refcount);
> +
>  	return region;
>  }
>  
> @@ -303,13 +306,13 @@ static int mshv_region_unmap(struct mshv_mem_region *region)
>  					 mshv_region_chunk_unmap);
>  }
>  
> -void mshv_region_destroy(struct mshv_mem_region *region)
> +static void mshv_region_destroy(struct kref *ref)
>  {
> +	struct mshv_mem_region *region =
> +		container_of(ref, struct mshv_mem_region, refcount);
>  	struct mshv_partition *partition = region->partition;
>  	int ret;
>  
> -	hlist_del(&region->hnode);
> -
>  	if (mshv_partition_encrypted(partition)) {
>  		ret = mshv_region_share(region);
>  		if (ret) {
> @@ -326,3 +329,13 @@ void mshv_region_destroy(struct mshv_mem_region *region)
>  
>  	vfree(region);
>  }
> +
> +void mshv_region_put(struct mshv_mem_region *region)
> +{
> +	kref_put(&region->refcount, mshv_region_destroy);
> +}
> +
> +int mshv_region_get(struct mshv_mem_region *region)
> +{
> +	return kref_get_unless_zero(&region->refcount);
> +}
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index ff3374f13691..4249534ba900 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -72,6 +72,7 @@ do { \
>  
>  struct mshv_mem_region {
>  	struct hlist_node hnode;
> +	struct kref refcount;
>  	u64 nr_pages;
>  	u64 start_gfn;
>  	u64 start_uaddr;
> @@ -97,6 +98,8 @@ struct mshv_partition {
>  	u64 pt_id;
>  	refcount_t pt_ref_count;
>  	struct mutex pt_mutex;
> +
> +	spinlock_t pt_mem_regions_lock;
>  	struct hlist_head pt_mem_regions; // not ordered
>  
>  	u32 pt_vp_count;
> @@ -319,6 +322,7 @@ int mshv_region_unshare(struct mshv_mem_region *region);
>  int mshv_region_map(struct mshv_mem_region *region);
>  void mshv_region_invalidate(struct mshv_mem_region *region);
>  int mshv_region_pin(struct mshv_mem_region *region);
> -void mshv_region_destroy(struct mshv_mem_region *region);
> +void mshv_region_put(struct mshv_mem_region *region);
> +int mshv_region_get(struct mshv_mem_region *region);
>  
>  #endif /* _MSHV_ROOT_H_ */
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 5dfb933da981..aa1a11f4dc3e 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1086,13 +1086,15 @@ static int mshv_partition_create_region(struct mshv_partition *partition,
>  	u64 nr_pages = HVPFN_DOWN(mem->size);
>  
>  	/* Reject overlapping regions */
> +	spin_lock(&partition->pt_mem_regions_lock);
>  	hlist_for_each_entry(rg, &partition->pt_mem_regions, hnode) {
>  		if (mem->guest_pfn + nr_pages <= rg->start_gfn ||
>  		    rg->start_gfn + rg->nr_pages <= mem->guest_pfn)
>  			continue;
> -
> +		spin_unlock(&partition->pt_mem_regions_lock);
>  		return -EEXIST;
>  	}
> +	spin_unlock(&partition->pt_mem_regions_lock);
>  
>  	rg = mshv_region_create(mem->guest_pfn, nr_pages,
>  				mem->userspace_addr, mem->flags,
> @@ -1224,8 +1226,9 @@ mshv_map_user_memory(struct mshv_partition *partition,
>  	if (ret)
>  		goto errout;
>  
> -	/* Install the new region */
> +	spin_lock(&partition->pt_mem_regions_lock);
>  	hlist_add_head(&region->hnode, &partition->pt_mem_regions);
> +	spin_unlock(&partition->pt_mem_regions_lock);
>  
>  	return 0;
>  
> @@ -1244,17 +1247,27 @@ mshv_unmap_user_memory(struct mshv_partition *partition,
>  	if (!(mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP)))
>  		return -EINVAL;
>  
> +	spin_lock(&partition->pt_mem_regions_lock);
> +
>  	region = mshv_partition_region_by_gfn(partition, mem.guest_pfn);
> -	if (!region)
> -		return -EINVAL;
> +	if (!region) {
> +		spin_unlock(&partition->pt_mem_regions_lock);
> +		return -ENOENT;
> +	}
>  
>  	/* Paranoia check */
>  	if (region->start_uaddr != mem.userspace_addr ||
>  	    region->start_gfn != mem.guest_pfn ||
> -	    region->nr_pages != HVPFN_DOWN(mem.size))
> +	    region->nr_pages != HVPFN_DOWN(mem.size)) {
> +		spin_unlock(&partition->pt_mem_regions_lock);
>  		return -EINVAL;
> +	}
> +
> +	hlist_del(&region->hnode);
>  
> -	mshv_region_destroy(region);
> +	spin_unlock(&partition->pt_mem_regions_lock);
> +
> +	mshv_region_put(region);
>  
>  	return 0;
>  }
> @@ -1657,8 +1670,10 @@ static void destroy_partition(struct mshv_partition *partition)
>  	remove_partition(partition);
>  
>  	hlist_for_each_entry_safe(region, n, &partition->pt_mem_regions,
> -				  hnode)
> -		mshv_region_destroy(region);
> +				  hnode) {
> +		hlist_del(&region->hnode);
> +		mshv_region_put(region);
> +	}
>  

With the following patch introducing movable memory, it looks like the
list could be traversed by mshv_partition_region_by_gfn() even while
the this hlist_del() is being called.

Maybe that's not possible for some reason I'm unaware of, could you
explain why we don't need to spin_lock here for hlist_del()?
Or, alternatively, use hlist_for_each_entry_safe() in
mshv_partition_region_by_gfn() to guard against the deletion?

>  	/* Withdraw and free all pages we deposited */
>  	hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id);
> @@ -1856,6 +1871,7 @@ mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
>  
>  	INIT_HLIST_HEAD(&partition->pt_devices);
>  
> +	spin_lock_init(&partition->pt_mem_regions_lock);
>  	INIT_HLIST_HEAD(&partition->pt_mem_regions);
>  
>  	mshv_eventfd_init(partition);
> 
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ