lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <45e7a4c0-f1d8-b8b4-8c03-56d06845323b@linux.microsoft.com>
Date: Fri, 23 Jan 2026 18:19:15 -0800
From: Mukesh R <mrathor@...ux.microsoft.com>
To: Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>
Cc: linux-kernel@...r.kernel.org, linux-hyperv@...r.kernel.org,
 linux-arm-kernel@...ts.infradead.org, iommu@...ts.linux.dev,
 linux-pci@...r.kernel.org, linux-arch@...r.kernel.org, kys@...rosoft.com,
 haiyangz@...rosoft.com, wei.liu@...nel.org, decui@...rosoft.com,
 longli@...rosoft.com, catalin.marinas@....com, will@...nel.org,
 tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
 dave.hansen@...ux.intel.com, hpa@...or.com, joro@...tes.org,
 lpieralisi@...nel.org, kwilczynski@...nel.org, mani@...nel.org,
 robh@...nel.org, bhelgaas@...gle.com, arnd@...db.de,
 nunodasneves@...ux.microsoft.com, mhklinux@...look.com
Subject: Re: [PATCH v0 15/15] mshv: Populate mmio mappings for PCI passthru

On 1/20/26 17:53, Stanislav Kinsburskii wrote:
> On Mon, Jan 19, 2026 at 10:42:30PM -0800, Mukesh R wrote:
>> From: Mukesh Rathor <mrathor@...ux.microsoft.com>
>>
>> Upon guest access, in case of missing mmio mapping, the hypervisor
>> generates an unmapped gpa intercept. In this path, lookup the PCI
>> resource pfn for the guest gpa, and ask the hypervisor to map it
>> via hypercall. The PCI resource pfn is maintained by the VFIO driver,
>> and obtained via fixup_user_fault call (similar to KVM).
>>
>> Signed-off-by: Mukesh Rathor <mrathor@...ux.microsoft.com>
>> ---
>>   drivers/hv/mshv_root_main.c | 115 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 115 insertions(+)
>>
>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>> index 03f3aa9f5541..4c8bc7cd0888 100644
>> --- a/drivers/hv/mshv_root_main.c
>> +++ b/drivers/hv/mshv_root_main.c
>> @@ -56,6 +56,14 @@ struct hv_stats_page {
>>   	};
>>   } __packed;
>>   
>> +bool hv_nofull_mmio;   /* don't map entire mmio region upon fault */
>> +static int __init setup_hv_full_mmio(char *str)
>> +{
>> +	hv_nofull_mmio = true;
>> +	return 0;
>> +}
>> +__setup("hv_nofull_mmio", setup_hv_full_mmio);
>> +
>>   struct mshv_root mshv_root;
>>   
>>   enum hv_scheduler_type hv_scheduler_type;
>> @@ -612,6 +620,109 @@ mshv_partition_region_by_gfn(struct mshv_partition *partition, u64 gfn)
>>   }
>>   
>>   #ifdef CONFIG_X86_64
>> +
>> +/*
>> + * Check if uaddr is for mmio range. If yes, return 0 with mmio_pfn filled in
>> + * else just return -errno.
>> + */
>> +static int mshv_chk_get_mmio_start_pfn(struct mshv_partition *pt, u64 gfn,
>> +				       u64 *mmio_pfnp)
>> +{
>> +	struct vm_area_struct *vma;
>> +	bool is_mmio;
>> +	u64 uaddr;
>> +	struct mshv_mem_region *mreg;
>> +	struct follow_pfnmap_args pfnmap_args;
>> +	int rc = -EINVAL;
>> +
>> +	/*
>> +	 * Do not allow mem region to be deleted beneath us. VFIO uses
>> +	 * useraddr vma to lookup pci bar pfn.
>> +	 */
>> +	spin_lock(&pt->pt_mem_regions_lock);
>> +
>> +	/* Get the region again under the lock */
>> +	mreg = mshv_partition_region_by_gfn(pt, gfn);
>> +	if (mreg == NULL || mreg->type != MSHV_REGION_TYPE_MMIO)
>> +		goto unlock_pt_out;
>> +
>> +	uaddr = mreg->start_uaddr +
>> +		((gfn - mreg->start_gfn) << HV_HYP_PAGE_SHIFT);
>> +
>> +	mmap_read_lock(current->mm);
> 
> Semaphore can't be taken under spinlock.
> Get it instead.

Yeah, something didn't feel right here and I meant to recheck, now regret
rushing to submit the patch.

Rethinking, I think the pt_mem_regions_lock is not needed to protect
the uaddr because unmap will properly serialize via the mm lock.


>> +	vma = vma_lookup(current->mm, uaddr);
>> +	is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
> 
> Why this check is needed again?

To make sure region did not change. This check is under lock.

> The region type is stored on the region itself.
> And the type is checked on the caller side.
> 
>> +	if (!is_mmio)
>> +		goto unlock_mmap_out;
>> +
>> +	pfnmap_args.vma = vma;
>> +	pfnmap_args.address = uaddr;
>> +
>> +	rc = follow_pfnmap_start(&pfnmap_args);
>> +	if (rc) {
>> +		rc = fixup_user_fault(current->mm, uaddr, FAULT_FLAG_WRITE,
>> +				      NULL);
>> +		if (rc)
>> +			goto unlock_mmap_out;
>> +
>> +		rc = follow_pfnmap_start(&pfnmap_args);
>> +		if (rc)
>> +			goto unlock_mmap_out;
>> +	}
>> +
>> +	*mmio_pfnp = pfnmap_args.pfn;
>> +	follow_pfnmap_end(&pfnmap_args);
>> +
>> +unlock_mmap_out:
>> +	mmap_read_unlock(current->mm);
>> +unlock_pt_out:
>> +	spin_unlock(&pt->pt_mem_regions_lock);
>> +	return rc;
>> +}
>> +
>> +/*
>> + * At present, the only unmapped gpa is mmio space. Verify if it's mmio
>> + * and resolve if possible.
>> + * Returns: True if valid mmio intercept and it was handled, else false
>> + */
>> +static bool mshv_handle_unmapped_gpa(struct mshv_vp *vp)
>> +{
>> +	struct hv_message *hvmsg = vp->vp_intercept_msg_page;
>> +	struct hv_x64_memory_intercept_message *msg;
>> +	union hv_x64_memory_access_info accinfo;
>> +	u64 gfn, mmio_spa, numpgs;
>> +	struct mshv_mem_region *mreg;
>> +	int rc;
>> +	struct mshv_partition *pt = vp->vp_partition;
>> +
>> +	msg = (struct hv_x64_memory_intercept_message *)hvmsg->u.payload;
>> +	accinfo = msg->memory_access_info;
>> +
>> +	if (!accinfo.gva_gpa_valid)
>> +		return false;
>> +
>> +	/* Do a fast check and bail if non mmio intercept */
>> +	gfn = msg->guest_physical_address >> HV_HYP_PAGE_SHIFT;
>> +	mreg = mshv_partition_region_by_gfn(pt, gfn);
> 
> This call needs to be protected by the spinlock.

This is sorta fast path to bail. We recheck under partition lock above.

Thanks,
-Mukesh


> Thanks,
> Stanislav
> 
>> +	if (mreg == NULL || mreg->type != MSHV_REGION_TYPE_MMIO)
>> +		return false;
>> +
>> +	rc = mshv_chk_get_mmio_start_pfn(pt, gfn, &mmio_spa);
>> +	if (rc)
>> +		return false;
>> +
>> +	if (!hv_nofull_mmio) {		/* default case */
>> +		gfn = mreg->start_gfn;
>> +		mmio_spa = mmio_spa - (gfn - mreg->start_gfn);
>> +		numpgs = mreg->nr_pages;
>> +	} else
>> +		numpgs = 1;
>> +
>> +	rc = hv_call_map_mmio_pages(pt->pt_id, gfn, mmio_spa, numpgs);
>> +
>> +	return rc == 0;
>> +}
>> +
>>   static struct mshv_mem_region *
>>   mshv_partition_region_by_gfn_get(struct mshv_partition *p, u64 gfn)
>>   {
>> @@ -666,13 +777,17 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp)
>>   
>>   	return ret;
>>   }
>> +
>>   #else  /* CONFIG_X86_64 */
>> +static bool mshv_handle_unmapped_gpa(struct mshv_vp *vp) { return false; }
>>   static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) { return false; }
>>   #endif /* CONFIG_X86_64 */
>>   
>>   static bool mshv_vp_handle_intercept(struct mshv_vp *vp)
>>   {
>>   	switch (vp->vp_intercept_msg_page->header.message_type) {
>> +	case HVMSG_UNMAPPED_GPA:
>> +		return mshv_handle_unmapped_gpa(vp);
>>   	case HVMSG_GPA_INTERCEPT:
>>   		return mshv_handle_gpa_intercept(vp);
>>   	}
>> -- 
>> 2.51.2.vfs.0.1
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ