lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 24 Oct 2021 13:16:57 +0100
From:   Marc Zyngier <maz@...nel.org>
To:     Sunil Muthuswamy <sunilmut@...ux.microsoft.com>
Cc:     kys@...rosoft.com, haiyangz@...rosoft.com, sthemmin@...rosoft.com,
        wei.liu@...nel.org, decui@...rosoft.com, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, hpa@...or.com,
        lorenzo.pieralisi@....com, robh@...nel.org, kw@...ux.com,
        bhelgaas@...gle.com, arnd@...db.de, x86@...nel.org,
        linux-kernel@...r.kernel.org, linux-hyperv@...r.kernel.org,
        linux-pci@...r.kernel.org, linux-arch@...r.kernel.org,
        Sunil Muthuswamy <sunilmut@...rosoft.com>
Subject: Re: [PATCH v3 1/2] PCI: hv: Make the code arch neutral by adding arch specific interfaces

On Thu, 14 Oct 2021 16:53:13 +0100,
Sunil Muthuswamy <sunilmut@...ux.microsoft.com> wrote:
> 
> From: Sunil Muthuswamy <sunilmut@...rosoft.com>
> 
> Encapsulate arch dependencies in Hyper-V vPCI through a set of interfaces,
> listed below. Adding these arch specific interfaces will allow for an
> implementation for other arch, such as ARM64.
> 
> Implement the interfaces for X64, which is essentially just moving over the
> current implementation.

Nit: use architecture names and capitalisation that match their use in
the kernel (arm64, x86) instead of the MS-specific lingo.

> 
> List of added interfaces:
>  - hv_pci_irqchip_init()
>  - hv_pci_irqchip_free()
>  - hv_msi_get_int_vector()
>  - hv_set_msi_entry_from_desc()
>  - hv_msi_prepare()
> 
> There are no functional changes expected from this patch.
> 
> Signed-off-by: Sunil Muthuswamy <sunilmut@...rosoft.com>
> ---
> In v2 & v3:
>  Changes are described in the cover letter.
> 
>  MAINTAINERS                                 |  2 +
>  arch/x86/include/asm/hyperv-tlfs.h          | 33 ++++++++++++
>  arch/x86/include/asm/mshyperv.h             |  7 ---
>  drivers/pci/controller/Makefile             |  2 +-
>  drivers/pci/controller/pci-hyperv-irqchip.c | 57 +++++++++++++++++++++
>  drivers/pci/controller/pci-hyperv-irqchip.h | 20 ++++++++
>  drivers/pci/controller/pci-hyperv.c         | 52 ++++++++++++-------
>  include/asm-generic/hyperv-tlfs.h           | 33 ------------
>  8 files changed, 146 insertions(+), 60 deletions(-)
>  create mode 100644 drivers/pci/controller/pci-hyperv-irqchip.c
>  create mode 100644 drivers/pci/controller/pci-hyperv-irqchip.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ca6d6fde85cf..ba8c979c17b2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -8688,6 +8688,8 @@ F:	drivers/iommu/hyperv-iommu.c
>  F:	drivers/net/ethernet/microsoft/
>  F:	drivers/net/hyperv/
>  F:	drivers/pci/controller/pci-hyperv-intf.c
> +F:	drivers/pci/controller/pci-hyperv-irqchip.c
> +F:	drivers/pci/controller/pci-hyperv-irqchip.h
>  F:	drivers/pci/controller/pci-hyperv.c
>  F:	drivers/scsi/storvsc_drv.c
>  F:	drivers/uio/uio_hv_generic.c
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 2322d6bd5883..fdf3d28fbdd5 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -585,6 +585,39 @@ enum hv_interrupt_type {
>  	HV_X64_INTERRUPT_TYPE_MAXIMUM           = 0x000A,
>  };
>  
> +union hv_msi_address_register {
> +	u32 as_uint32;
> +	struct {
> +		u32 reserved1:2;
> +		u32 destination_mode:1;
> +		u32 redirection_hint:1;
> +		u32 reserved2:8;
> +		u32 destination_id:8;
> +		u32 msi_base:12;
> +	};
> +} __packed;
> +
> +union hv_msi_data_register {
> +	u32 as_uint32;
> +	struct {
> +		u32 vector:8;
> +		u32 delivery_mode:3;
> +		u32 reserved1:3;
> +		u32 level_assert:1;
> +		u32 trigger_mode:1;
> +		u32 reserved2:16;
> +	};
> +} __packed;
> +
> +/* HvRetargetDeviceInterrupt hypercall */
> +union hv_msi_entry {
> +	u64 as_uint64;
> +	struct {
> +		union hv_msi_address_register address;
> +		union hv_msi_data_register data;
> +	} __packed;
> +};
> +
>  #include <asm-generic/hyperv-tlfs.h>
>  
>  #endif
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index adccbc209169..c2b9ab94408e 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -176,13 +176,6 @@ bool hv_vcpu_is_preempted(int vcpu);
>  static inline void hv_apic_init(void) {}
>  #endif
>  
> -static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
> -					      struct msi_desc *msi_desc)
> -{
> -	msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
> -	msi_entry->data.as_uint32 = msi_desc->msg.data;
> -}
> -
>  struct irq_domain *hv_create_pci_msi_domain(void);
>  
>  int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
> diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
> index aaf30b3dcc14..2c301d0fc23b 100644
> --- a/drivers/pci/controller/Makefile
> +++ b/drivers/pci/controller/Makefile
> @@ -2,7 +2,7 @@
>  obj-$(CONFIG_PCIE_CADENCE) += cadence/
>  obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
>  obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
> -obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
> +obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o pci-hyperv-irqchip.o
>  obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
>  obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
>  obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
> diff --git a/drivers/pci/controller/pci-hyperv-irqchip.c b/drivers/pci/controller/pci-hyperv-irqchip.c
> new file mode 100644
> index 000000000000..36fa862f8bc5
> --- /dev/null
> +++ b/drivers/pci/controller/pci-hyperv-irqchip.c
> @@ -0,0 +1,57 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Hyper-V vPCI irqchip.
> + *
> + * Copyright (C) 2021, Microsoft, Inc.
> + *
> + * Author : Sunil Muthuswamy <sunilmut@...rosoft.com>
> + */
> +
> +#include <asm/mshyperv.h>
> +#include <linux/acpi.h>
> +#include <linux/irqdomain.h>
> +#include <linux/irq.h>
> +#include <linux/msi.h>
> +
> +#ifdef CONFIG_X86_64
> +int hv_pci_irqchip_init(struct irq_domain **parent_domain,
> +			bool *fasteoi_handler,
> +			u8 *delivery_mode)
> +{
> +	*parent_domain = x86_vector_domain;
> +	*fasteoi_handler = false;
> +	*delivery_mode = APIC_DELIVERY_MODE_FIXED;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(hv_pci_irqchip_init);

Why do you need to export any of these symbols? Even if the two
objects are compiled separately, there is absolutely no need to make
them two separate modules.

Also, returning 3 values like this makes little sense. Pass a pointer
to the structure that requires them and populate it as required. Or
simply #define those that are constants.

> +
> +void hv_pci_irqchip_free(void) {}
> +EXPORT_SYMBOL(hv_pci_irqchip_free);
> +
> +unsigned int hv_msi_get_int_vector(struct irq_data *data)
> +{
> +	struct irq_cfg *cfg = irqd_cfg(data);
> +
> +	return cfg->vector;
> +}
> +EXPORT_SYMBOL(hv_msi_get_int_vector);
> +
> +void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
> +				struct msi_desc *msi_desc)
> +{
> +	msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
> +	msi_entry->data.as_uint32 = msi_desc->msg.data;
> +}
> +EXPORT_SYMBOL(hv_set_msi_entry_from_desc);
> +
> +int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
> +		   int nvec, msi_alloc_info_t *info)
> +{
> +	return pci_msi_prepare(domain, dev, nvec, info);
> +}
> +EXPORT_SYMBOL(hv_msi_prepare);

This looks like a very unnecessary level of indirection, given that
you end-up with an empty callback in the arm64 code. The following
works just as well and avoids useless callbacks:

#ifdef CONFIG_ARM64
#define pci_msi_prepare	NULL
#endif

I also wish that pci_msi_prepare was called x86_pci_msi_prepare, but
that's another debate...

> +
> +#endif
> +
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/pci/controller/pci-hyperv-irqchip.h b/drivers/pci/controller/pci-hyperv-irqchip.h
> new file mode 100644
> index 000000000000..00549809e6c4
> --- /dev/null
> +++ b/drivers/pci/controller/pci-hyperv-irqchip.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Architecture specific vector management for the Hyper-V vPCI.
> + *
> + * Copyright (C) 2021, Microsoft, Inc.
> + *
> + * Author : Sunil Muthuswamy <sunilmut@...rosoft.com>
> + */
> +
> +int hv_pci_irqchip_init(struct irq_domain **parent_domain,
> +			bool *fasteoi_handler,
> +			u8 *delivery_mode);
> +
> +void hv_pci_irqchip_free(void);
> +unsigned int hv_msi_get_int_vector(struct irq_data *data);
> +void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
> +				struct msi_desc *msi_desc);
> +
> +int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
> +		   int nvec, msi_alloc_info_t *info);
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index eaec915ffe62..2d3916206986 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -43,14 +43,12 @@
>  #include <linux/pci-ecam.h>
>  #include <linux/delay.h>
>  #include <linux/semaphore.h>
> -#include <linux/irqdomain.h>
> -#include <asm/irqdomain.h>
> -#include <asm/apic.h>
>  #include <linux/irq.h>
>  #include <linux/msi.h>
>  #include <linux/hyperv.h>
>  #include <linux/refcount.h>
>  #include <asm/mshyperv.h>
> +#include "pci-hyperv-irqchip.h"
>  
>  /*
>   * Protocol versions. The low word is the minor version, the high word the
> @@ -81,6 +79,10 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
>  	PCI_PROTOCOL_VERSION_1_1,
>  };
>  
> +static struct irq_domain *parent_domain;
> +static bool fasteoi;
> +static u8 delivery_mode;

See my earlier comment about how clumsy this is.

> +
>  #define PCI_CONFIG_MMIO_LENGTH	0x2000
>  #define CFG_PAGE_OFFSET 0x1000
>  #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
> @@ -1217,7 +1219,6 @@ static void hv_irq_mask(struct irq_data *data)
>  static void hv_irq_unmask(struct irq_data *data)
>  {
>  	struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
> -	struct irq_cfg *cfg = irqd_cfg(data);
>  	struct hv_retarget_device_interrupt *params;
>  	struct hv_pcibus_device *hbus;
>  	struct cpumask *dest;
> @@ -1246,11 +1247,12 @@ static void hv_irq_unmask(struct irq_data *data)
>  			   (hbus->hdev->dev_instance.b[7] << 8) |
>  			   (hbus->hdev->dev_instance.b[6] & 0xf8) |
>  			   PCI_FUNC(pdev->devfn);
> -	params->int_target.vector = cfg->vector;
> +	params->int_target.vector = hv_msi_get_int_vector(data);
>  
>  	/*
> -	 * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by
> -	 * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
> +	 * For x64, honoring apic->delivery_mode set to
> +	 * APIC_DELIVERY_MODE_FIXED by setting the
> +	 * HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
>  	 * spurious interrupt storm. Not doing so does not seem to have a
>  	 * negative effect (yet?).

And what does it mean on other architectures?

>  	 */
> @@ -1347,7 +1349,7 @@ static u32 hv_compose_msi_req_v1(
>  	int_pkt->wslot.slot = slot;
>  	int_pkt->int_desc.vector = vector;
>  	int_pkt->int_desc.vector_count = 1;
> -	int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> +	int_pkt->int_desc.delivery_mode = delivery_mode;
>  
>  	/*
>  	 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
> @@ -1377,7 +1379,7 @@ static u32 hv_compose_msi_req_v2(
>  	int_pkt->wslot.slot = slot;
>  	int_pkt->int_desc.vector = vector;
>  	int_pkt->int_desc.vector_count = 1;
> -	int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> +	int_pkt->int_desc.delivery_mode = delivery_mode;
>  	cpu = hv_compose_msi_req_get_cpu(affinity);
>  	int_pkt->int_desc.processor_array[0] =
>  		hv_cpu_number_to_vp_number(cpu);
> @@ -1397,7 +1399,7 @@ static u32 hv_compose_msi_req_v3(
>  	int_pkt->int_desc.vector = vector;
>  	int_pkt->int_desc.reserved = 0;
>  	int_pkt->int_desc.vector_count = 1;
> -	int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> +	int_pkt->int_desc.delivery_mode = delivery_mode;
>  	cpu = hv_compose_msi_req_get_cpu(affinity);
>  	int_pkt->int_desc.processor_array[0] =
>  		hv_cpu_number_to_vp_number(cpu);
> @@ -1419,7 +1421,6 @@ static u32 hv_compose_msi_req_v3(
>   */
>  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  {
> -	struct irq_cfg *cfg = irqd_cfg(data);
>  	struct hv_pcibus_device *hbus;
>  	struct vmbus_channel *channel;
>  	struct hv_pci_dev *hpdev;
> @@ -1470,7 +1471,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
>  					dest,
>  					hpdev->desc.win_slot.slot,
> -					cfg->vector);
> +					hv_msi_get_int_vector(data));
>  		break;
>  
>  	case PCI_PROTOCOL_VERSION_1_2:
> @@ -1478,14 +1479,14 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
>  					dest,
>  					hpdev->desc.win_slot.slot,
> -					cfg->vector);
> +					hv_msi_get_int_vector(data));
>  		break;
>  
>  	case PCI_PROTOCOL_VERSION_1_4:
>  		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
>  					dest,
>  					hpdev->desc.win_slot.slot,
> -					cfg->vector);
> +					hv_msi_get_int_vector(data));
>  		break;
>  
>  	default:
> @@ -1601,7 +1602,7 @@ static struct irq_chip hv_msi_irq_chip = {
>  };
>  
>  static struct msi_domain_ops hv_msi_ops = {
> -	.msi_prepare	= pci_msi_prepare,
> +	.msi_prepare	= hv_msi_prepare,
>  	.msi_free	= hv_msi_free,
>  };
>  
> @@ -1625,12 +1626,13 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
>  	hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
>  		MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
>  		MSI_FLAG_PCI_MSIX);
> -	hbus->msi_info.handler = handle_edge_irq;
> -	hbus->msi_info.handler_name = "edge";
> +	hbus->msi_info.handler =
> +		fasteoi ? handle_fasteoi_irq : handle_edge_irq;
> +	hbus->msi_info.handler_name = fasteoi ? "fasteoi" : "edge";

The fact that you somehow need to know what the GIC is using as a flow
handler is a sure sign that you are doing something wrong. In a
hierarchical setup, only the root of the hierarchy should ever know
about that. Having anything there is actively wrong.

>  	hbus->msi_info.data = hbus;
>  	hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
>  						     &hbus->msi_info,
> -						     x86_vector_domain);
> +						     parent_domain);
>  	if (!hbus->irq_domain) {
>  		dev_err(&hbus->hdev->device,
>  			"Failed to build an MSI IRQ domain\n");
> @@ -3531,13 +3533,21 @@ static void __exit exit_hv_pci_drv(void)
>  	hvpci_block_ops.read_block = NULL;
>  	hvpci_block_ops.write_block = NULL;
>  	hvpci_block_ops.reg_blk_invalidate = NULL;
> +
> +	hv_pci_irqchip_free();
>  }
>  
>  static int __init init_hv_pci_drv(void)
>  {
> +	int ret;
> +
>  	if (!hv_is_hyperv_initialized())
>  		return -ENODEV;
>  
> +	ret = hv_pci_irqchip_init(&parent_domain, &fasteoi, &delivery_mode);
> +	if (ret)
> +		return ret;

Having established that the fasteoi thing is nothing but a bug, that
the delivery_mode is a constant, and that all that matters is actually
the parent domain which is a global pointer on x86, and something that
gets allocated on arm64, you can greatly simplify the whole thing:

#ifdef CONFIG_X86
#define DELIVERY_MODE	APIC_DELIVERY_MODE_FIXED
#define FLOW_HANDLER	handle_edge_irq
#define FLOW_NAME	"edge"

static struct irq_domain *hv_pci_get_root_domain(void)
{
	return x86_vector_domain;
}
#endif

#ifdef CONFIG_ARM64
#define DELIVERY_MODE	0
#define FLOW_HANDLER	NULL
#define FLOW_NAME	NULL
#define pci_msi_prepare	NULL

static struct irq_domain *hv_pci_get_root_domain(void)
{
	[...]
}
#endif

as once you look at it seriously, the whole "separate file for the IRQ
code" is totally unnecessary (as Michael pointed out earlier), because
the abstractions you are adding are for most of them unnecessary.

> +
>  	/* Set the invalid domain number's bit, so it will not be used */
>  	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
>  
> @@ -3546,7 +3556,11 @@ static int __init init_hv_pci_drv(void)
>  	hvpci_block_ops.write_block = hv_write_config_block;
>  	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
>  
> -	return vmbus_driver_register(&hv_pci_drv);
> +	ret = vmbus_driver_register(&hv_pci_drv);
> +	if (ret)
> +		hv_pci_irqchip_free();
> +
> +	return ret;
>  }
>  
>  module_init(init_hv_pci_drv);
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index 56348a541c50..45cc0c3b8ed7 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -539,39 +539,6 @@ enum hv_interrupt_source {
>  	HV_INTERRUPT_SOURCE_IOAPIC,
>  };
>  
> -union hv_msi_address_register {
> -	u32 as_uint32;
> -	struct {
> -		u32 reserved1:2;
> -		u32 destination_mode:1;
> -		u32 redirection_hint:1;
> -		u32 reserved2:8;
> -		u32 destination_id:8;
> -		u32 msi_base:12;
> -	};
> -} __packed;
> -
> -union hv_msi_data_register {
> -	u32 as_uint32;
> -	struct {
> -		u32 vector:8;
> -		u32 delivery_mode:3;
> -		u32 reserved1:3;
> -		u32 level_assert:1;
> -		u32 trigger_mode:1;
> -		u32 reserved2:16;
> -	};
> -} __packed;
> -
> -/* HvRetargetDeviceInterrupt hypercall */
> -union hv_msi_entry {
> -	u64 as_uint64;
> -	struct {
> -		union hv_msi_address_register address;
> -		union hv_msi_data_register data;
> -	} __packed;
> -};
> -
>  union hv_ioapic_rte {
>  	u64 as_uint64;
>  

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

Powered by blists - more mailing lists