lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
 <SN6PR02MB4157ADD06608EFE00B86A3F7D451A@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Thu, 17 Jul 2025 17:25:54 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Dan Williams <dan.j.williams@...el.com>, "bhelgaas@...gle.com"
	<bhelgaas@...gle.com>
CC: "linux-pci@...r.kernel.org" <linux-pci@...r.kernel.org>, "lukas@...ner.de"
	<lukas@...ner.de>, "linux-kernel@...r.kernel.org"
	<linux-kernel@...r.kernel.org>, "Jonathan.Cameron@...wei.com"
	<Jonathan.Cameron@...wei.com>, Suzuki K Poulose <suzuki.poulose@....com>,
	Lorenzo Pieralisi <lpieralisi@...nel.org>, Manivannan Sadhasivam
	<manivannan.sadhasivam@...aro.org>, Rob Herring <robh@...nel.org>, "K. Y.
 Srinivasan" <kys@...rosoft.com>, Haiyang Zhang <haiyangz@...rosoft.com>, Wei
 Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>, "open
 list:Hyper-V/Azure CORE AND DRIVERS" <linux-hyperv@...r.kernel.org>
Subject: RE: [PATCH 2/3] PCI: Enable host bridge emulation for
 PCI_DOMAINS_GENERIC platforms

From: Dan Williams <dan.j.williams@...el.com> Sent: Wednesday, July 16, 2025 9:09 AM
> 
> The ability to emulate a host bridge is useful not only for hardware PCI
> controllers like CONFIG_VMD, or virtual PCI controllers like
> CONFIG_PCI_HYPERV, but also for test and development scenarios like
> CONFIG_SAMPLES_DEVSEC [1].
> 
> One stumbling block for defining CONFIG_SAMPLES_DEVSEC, a sample
> implementation of a platform TSM for PCI Device Security, is the need to
> accommodate PCI_DOMAINS_GENERIC architectures alongside x86 [2].
> 
> In support of supplementing the existing CONFIG_PCI_BRIDGE_EMUL
> infrastructure for host bridges:
> 
> * Introduce pci_bus_find_emul_domain_nr() as a common way to find a free
>   PCI domain number whether that is to reuse the existing dynamic
>   allocation code in the !ACPI case, or to assign an unused domain above
>   the last ACPI segment.
> 
> * Convert pci-hyperv to the new allocator so that the PCI core can
>   unconditionally assume that bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET
>   is the dynamically allocated case.
> 
> A follow on patch can also convert vmd to the new scheme. Currently vmd
> is limited to CONFIG_PCI_DOMAINS_GENERIC=n (x86) so, unlike pci-hyperv,
> it does not immediately conflict with this new
> pci_bus_find_emul_domain_nr() mechanism.
> 
> Link: https://lore.kernel.org/all/174107249038.1288555.12362100502109498455.stgit@dwillia2-xfh.jf.intel.com/ [1]
> Reported-by: Suzuki K Poulose <suzuki.poulose@....com>
> Closes: https://lore.kernel.org/all/20250311144601.145736-3-suzuki.poulose@arm.com/ 
> Cc: Lorenzo Pieralisi <lpieralisi@...nel.org>
> Cc: Manivannan Sadhasivam <manivannan.sadhasivam@...aro.org>
> Cc: Rob Herring <robh@...nel.org>
> Cc: Bjorn Helgaas <bhelgaas@...gle.com>
> Cc: "K. Y. Srinivasan" <kys@...rosoft.com>
> Cc: Haiyang Zhang <haiyangz@...rosoft.com>
> Cc: Wei Liu <wei.liu@...nel.org>
> Cc: Dexuan Cui <decui@...rosoft.com>
> Tested-by: Suzuki K Poulose <suzuki.poulose@....com>
> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 53 ++---------------------------
>  drivers/pci/pci.c                   | 43 ++++++++++++++++++++++-
>  drivers/pci/probe.c                 |  8 ++++-
>  include/linux/pci.h                 |  4 +++
>  4 files changed, 56 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index ef5d655a0052..cfe9806bdbe4 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -3630,48 +3630,6 @@ static int hv_send_resources_released(struct hv_device *hdev)
>  	return 0;
>  }
> 
> -#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> -static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> -
> -/*
> - * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
> - * as invalid for passthrough PCI devices of this driver.
> - */
> -#define HVPCI_DOM_INVALID 0
> -
> -/**
> - * hv_get_dom_num() - Get a valid PCI domain number
> - * Check if the PCI domain number is in use, and return another number if
> - * it is in use.
> - *
> - * @dom: Requested domain number
> - *
> - * return: domain number on success, HVPCI_DOM_INVALID on failure
> - */
> -static u16 hv_get_dom_num(u16 dom)
> -{
> -	unsigned int i;
> -
> -	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> -		return dom;
> -
> -	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> -		if (test_and_set_bit(i, hvpci_dom_map) == 0)
> -			return i;
> -	}
> -
> -	return HVPCI_DOM_INVALID;
> -}
> -
> -/**
> - * hv_put_dom_num() - Mark the PCI domain number as free
> - * @dom: Domain number to be freed
> - */
> -static void hv_put_dom_num(u16 dom)
> -{
> -	clear_bit(dom, hvpci_dom_map);
> -}
> -
>  /**
>   * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
>   * @hdev:	VMBus's tracking struct for this root PCI bus
> @@ -3715,9 +3673,9 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	 * collisions) in the same VM.
>  	 */
>  	dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
> -	dom = hv_get_dom_num(dom_req);
> +	dom = pci_bus_find_emul_domain_nr(dom_req);
> 
> -	if (dom == HVPCI_DOM_INVALID) {
> +	if (dom < 0) {
>  		dev_err(&hdev->device,
>  			"Unable to use dom# 0x%x or other numbers", dom_req);
>  		ret = -EINVAL;
> @@ -3851,7 +3809,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  destroy_wq:
>  	destroy_workqueue(hbus->wq);
>  free_dom:
> -	hv_put_dom_num(hbus->bridge->domain_nr);
> +	pci_bus_release_emul_domain_nr(hbus->bridge->domain_nr);
>  free_bus:
>  	kfree(hbus);
>  	return ret;
> @@ -3976,8 +3934,6 @@ static void hv_pci_remove(struct hv_device *hdev)
>  	irq_domain_remove(hbus->irq_domain);
>  	irq_domain_free_fwnode(hbus->fwnode);
> 
> -	hv_put_dom_num(hbus->bridge->domain_nr);
> -
>  	kfree(hbus);
>  }
> 
> @@ -4148,9 +4104,6 @@ static int __init init_hv_pci_drv(void)
>  	if (ret)
>  		return ret;
> 
> -	/* Set the invalid domain number's bit, so it will not be used */
> -	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
> -
>  	/* Initialize PCI block r/w interface */
>  	hvpci_block_ops.read_block = hv_read_config_block;
>  	hvpci_block_ops.write_block = hv_write_config_block;
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index e9448d55113b..833ebf2d5213 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6692,9 +6692,50 @@ static void pci_no_domains(void)
>  #endif
>  }
> 
> +#ifdef CONFIG_PCI_DOMAINS
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
> +
> +/*
> + * Find a free domain_nr either allocated by pci_domain_nr_dynamic_ida or
> + * fallback to the first free domain number above the last ACPI segment number.
> + * Caller may have a specific domain number in mind, in which case try to
> + * reserve it.
> + *
> + * Note that this allocation is freed by pci_release_host_bridge_dev().
> + */
> +int pci_bus_find_emul_domain_nr(int hint)
> +{
> +	if (hint >= 0) {
> +		hint = ida_alloc_range(&pci_domain_nr_dynamic_ida, hint, hint,
> +				       GFP_KERNEL);

This almost preserves the existing functionality in pci-hyperv.c. But if the
"hint" passed in is zero, current code in pci-hyperv.c treats that as a
collision and allocates some other value. The special treatment of zero is
necessary per the comment with the definition of HVPCI_DOM_INVALID.

I don't have an opinion on whether the code here should treat a "hint"
of zero as invalid, or whether that should be handled in pci-hyperv.c.

> +
> +		if (hint >= 0)
> +			return hint;
> +	}
> +
> +	if (acpi_disabled)
> +		return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
> +
> +	/*
> +	 * Emulated domains start at 0x10000 to not clash with ACPI _SEG
> +	 * domains.  Per ACPI r6.0, sec 6.5.6,  _SEG returns an integer, of
> +	 * which the lower 16 bits are the PCI Segment Group (domain) number.
> +	 * Other bits are currently reserved.
> +	 */

Back in 2018 and 2019, the Microsoft Linux team encountered problems with
PCI domain IDs that exceeded 0xFFFF. User space code, such as the Xorg X server,
assumed PCI domain IDs were at most 16 bits, and retained only the low 16 bits
if the value was larger. My memory of the details is vague, but I believe some
or all of this behavior was tied to libpciaccess. As a result of these user space
limitations, the pci-hyperv.c code made sure to not create any domain IDs
larger than 0xFFFF. The problem was not just theoretical -- Microsoft had
customers reporting issues due to the "32-bit domain ID problem" and the
pci-hyperv.c code was updated to avoid it.

I don't have information on whether user space code has been fixed, or
the extent to which such a fix has propagated into distro versions. At the
least, a VM with old user space code might break if the kernel is upgraded
to one with this patch. How do people see the risks now that it is 6 years
later? I don't have enough data to make an assessment.

Michael

> +	return ida_alloc_range(&pci_domain_nr_dynamic_ida, 0x10000, INT_MAX,
> +			       GFP_KERNEL);
> +}
> +EXPORT_SYMBOL_GPL(pci_bus_find_emul_domain_nr);
> +
> +void pci_bus_release_emul_domain_nr(int domain_nr)
> +{
> +	ida_free(&pci_domain_nr_dynamic_ida, domain_nr);
> +}
> +EXPORT_SYMBOL_GPL(pci_bus_release_emul_domain_nr);
> +#endif
> +
>  #ifdef CONFIG_PCI_DOMAINS_GENERIC
>  static DEFINE_IDA(pci_domain_nr_static_ida);
> -static DEFINE_IDA(pci_domain_nr_dynamic_ida);
> 
>  static void of_pci_reserve_static_domain_nr(void)
>  {
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 4b8693ec9e4c..e94978c3be3d 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -632,6 +632,11 @@ static void pci_release_host_bridge_dev(struct device *dev)
> 
>  	pci_free_resource_list(&bridge->windows);
>  	pci_free_resource_list(&bridge->dma_ranges);
> +
> +	/* Host bridges only have domain_nr set in the emulation case */
> +	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
> +		pci_bus_release_emul_domain_nr(bridge->domain_nr);
> +
>  	kfree(bridge);
>  }
> 
> @@ -1112,7 +1117,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
>  	device_del(&bridge->dev);
>  free:
>  #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -	pci_bus_release_domain_nr(parent, bus->domain_nr);
> +	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> +		pci_bus_release_domain_nr(parent, bus->domain_nr);
>  #endif
>  	if (bus_registered)
>  		put_device(&bus->dev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 05e68f35f392..f6a713da5c49 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1934,10 +1934,14 @@ DEFINE_GUARD(pci_dev, struct pci_dev *,
> pci_dev_lock(_T), pci_dev_unlock(_T))
>   */
>  #ifdef CONFIG_PCI_DOMAINS
>  extern int pci_domains_supported;
> +int pci_bus_find_emul_domain_nr(int hint);
> +void pci_bus_release_emul_domain_nr(int domain_nr);
>  #else
>  enum { pci_domains_supported = 0 };
>  static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
>  static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
> +static inline int pci_bus_find_emul_domain_nr(int hint) { return 0; }
> +static inline void pci_bus_release_emul_domain_nr(int domain_nr) { }
>  #endif /* CONFIG_PCI_DOMAINS */
> 
>  /*
> --
> 2.50.1
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ