[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y2l7nJ3l1jeOebGG@Boquns-Mac-mini.local>
Date: Mon, 7 Nov 2022 13:41:48 -0800
From: Boqun Feng <boqun.feng@...il.com>
To: Michael Kelley <mikelley@...rosoft.com>
Cc: hpa@...or.com, kys@...rosoft.com, haiyangz@...rosoft.com,
sthemmin@...rosoft.com, wei.liu@...nel.org, decui@...rosoft.com,
luto@...nel.org, peterz@...radead.org, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
lpieralisi@...nel.org, robh@...nel.org, kw@...ux.com,
bhelgaas@...gle.com, arnd@...db.de, hch@...radead.org,
m.szyprowski@...sung.com, robin.murphy@....com,
thomas.lendacky@....com, brijesh.singh@....com, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
Tianyu.Lan@...rosoft.com, kirill.shutemov@...ux.intel.com,
sathyanarayanan.kuppuswamy@...ux.intel.com, ak@...ux.intel.com,
isaku.yamahata@...el.com, dan.j.williams@...el.com,
jane.chu@...cle.com, seanjc@...gle.com, tony.luck@...el.com,
x86@...nel.org, linux-kernel@...r.kernel.org,
linux-hyperv@...r.kernel.org, netdev@...r.kernel.org,
linux-pci@...r.kernel.org, linux-arch@...r.kernel.org,
iommu@...ts.linux.dev
Subject: Re: [PATCH 12/12] PCI: hv: Enable PCI pass-thru devices in
Confidential VMs
On Thu, Oct 20, 2022 at 10:57:15AM -0700, Michael Kelley wrote:
> For PCI pass-thru devices in a Confidential VM, Hyper-V requires
> that PCI config space be accessed via hypercalls. In normal VMs,
> config space accesses are trapped to the Hyper-V host and emulated.
> But in a confidential VM, the host can't access guest memory to
> decode the instruction for emulation, so an explicit hypercall must
> be used.
>
> Update the PCI config space access functions to use the hypercalls
> when such use is indicated by Hyper-V flags. Also, set the flag to
> allow the Hyper-V PCI driver to be loaded and used in a Confidential
> VM (a.k.a., "Isolation VM"). The driver has previously been hardened
> against a malicious Hyper-V host[1].
>
> [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@gmail.com/
>
> Co-developed-by: Dexuan Cui <decui@...rosoft.com>
> Signed-off-by: Dexuan Cui <decui@...rosoft.com>
> Signed-off-by: Michael Kelley <mikelley@...rosoft.com>
> ---
> drivers/hv/channel_mgmt.c | 2 +-
> drivers/pci/controller/pci-hyperv.c | 153 +++++++++++++++++++++---------------
> 2 files changed, 92 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 5b12040..c0f9ac2 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -67,7 +67,7 @@
> { .dev_type = HV_PCIE,
> HV_PCIE_GUID,
> .perf_device = false,
> - .allowed_in_isolated = false,
> + .allowed_in_isolated = true,
> },
>
> /* Synthetic Frame Buffer */
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 02ebf3e..9873296 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -514,6 +514,7 @@ struct hv_pcibus_device {
>
> /* Highest slot of child device with resources allocated */
> int wslot_res_allocated;
> + bool use_calls; /* Use hypercalls to access mmio cfg space */
>
> /* hypercall arg, must not cross page boundary */
> struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> @@ -1134,8 +1135,9 @@ static void hv_pci_write_mmio(phys_addr_t gpa, int size, u32 val)
> static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
> int size, u32 *val)
> {
> + struct hv_pcibus_device *hbus = hpdev->hbus;
> + int offset = where + CFG_PAGE_OFFSET;
> unsigned long flags;
> - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>
> /*
> * If the attempt is to read the IDs or the ROM BAR, simulate that.
> @@ -1163,56 +1165,74 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
> */
> *val = 0;
> } else if (where + size <= CFG_PAGE_SIZE) {
> - spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> - /* Choose the function to be read. (See comment above) */
> - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> - /* Make sure the function was chosen before we start reading. */
> - mb();
> - /* Read from that function's config space. */
> - switch (size) {
> - case 1:
> - *val = readb(addr);
> - break;
> - case 2:
> - *val = readw(addr);
> - break;
> - default:
> - *val = readl(addr);
> - break;
> +
> + spin_lock_irqsave(&hbus->config_lock, flags);
> + if (hbus->use_calls) {
> + phys_addr_t addr = hbus->mem_config->start + offset;
> +
> + hv_pci_write_mmio(hbus->mem_config->start, 4,
> + hpdev->desc.win_slot.slot);
> + hv_pci_read_mmio(addr, size, val);
> + } else {
> + void __iomem *addr = hbus->cfg_addr + offset;
> +
> + /* Choose the function to be read. (See comment above) */
> + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> + /* Make sure the function was chosen before reading. */
> + mb();
> + /* Read from that function's config space. */
> + switch (size) {
> + case 1:
> + *val = readb(addr);
> + break;
> + case 2:
> + *val = readw(addr);
> + break;
> + default:
> + *val = readl(addr);
> + break;
> + }
An mb() is missing here?
> }
> - /*
> - * Make sure the read was done before we release the spinlock
> - * allowing consecutive reads/writes.
> - */
> - mb();
> - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> + spin_unlock_irqrestore(&hbus->config_lock, flags);
> } else {
> - dev_err(&hpdev->hbus->hdev->device,
> + dev_err(&hbus->hdev->device,
> "Attempt to read beyond a function's config space.\n");
> }
> }
>
> static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
> {
> + struct hv_pcibus_device *hbus = hpdev->hbus;
> + u32 val;
> u16 ret;
> unsigned long flags;
> - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
> - PCI_VENDOR_ID;
>
> - spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> + spin_lock_irqsave(&hbus->config_lock, flags);
>
> - /* Choose the function to be read. (See comment above) */
> - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> - /* Make sure the function was chosen before we start reading. */
> - mb();
> - /* Read from that function's config space. */
> - ret = readw(addr);
> - /*
> - * mb() is not required here, because the spin_unlock_irqrestore()
> - * is a barrier.
> - */
> + if (hbus->use_calls) {
> + phys_addr_t addr = hbus->mem_config->start +
> + CFG_PAGE_OFFSET + PCI_VENDOR_ID;
> +
> + hv_pci_write_mmio(hbus->mem_config->start, 4,
> + hpdev->desc.win_slot.slot);
> + hv_pci_read_mmio(addr, 2, &val);
> + ret = val; /* Truncates to 16 bits */
> + } else {
> + void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET +
> + PCI_VENDOR_ID;
> + /* Choose the function to be read. (See comment above) */
> + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> + /* Make sure the function was chosen before we start reading. */
> + mb();
> + /* Read from that function's config space. */
> + ret = readw(addr);
> + /*
> + * mb() is not required here, because the
> + * spin_unlock_irqrestore() is a barrier.
> + */
> + }
>
> - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> + spin_unlock_irqrestore(&hbus->config_lock, flags);
>
> return ret;
> }
> @@ -1227,38 +1247,45 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
> static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
> int size, u32 val)
> {
> + struct hv_pcibus_device *hbus = hpdev->hbus;
> + int offset = where + CFG_PAGE_OFFSET;
> unsigned long flags;
> - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>
> if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
> where + size <= PCI_CAPABILITY_LIST) {
> /* SSIDs and ROM BARs are read-only */
> } else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
> - spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> - /* Choose the function to be written. (See comment above) */
> - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> - /* Make sure the function was chosen before we start writing. */
> - wmb();
> - /* Write to that function's config space. */
> - switch (size) {
> - case 1:
> - writeb(val, addr);
> - break;
> - case 2:
> - writew(val, addr);
> - break;
> - default:
> - writel(val, addr);
> - break;
> + spin_lock_irqsave(&hbus->config_lock, flags);
> +
> + if (hbus->use_calls) {
> + phys_addr_t addr = hbus->mem_config->start + offset;
> +
> + hv_pci_write_mmio(hbus->mem_config->start, 4,
> + hpdev->desc.win_slot.slot);
> + hv_pci_write_mmio(addr, size, val);
> + } else {
> + void __iomem *addr = hbus->cfg_addr + offset;
> +
> + /* Choose the function to write. (See comment above) */
> + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> + /* Make sure the function was chosen before writing. */
> + wmb();
> + /* Write to that function's config space. */
> + switch (size) {
> + case 1:
> + writeb(val, addr);
> + break;
> + case 2:
> + writew(val, addr);
> + break;
> + default:
> + writel(val, addr);
> + break;
> + }
Ditto, an mb() is needed here to align with the old code.
With these fixed, feel free to add
Reviewed-by: Boqun Feng <boqun.feng@...il.com>
Regards,
BOqun
> }
> - /*
> - * Make sure the write was done before we release the spinlock
> - * allowing consecutive reads/writes.
> - */
> - mb();
> - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> + spin_unlock_irqrestore(&hbus->config_lock, flags);
> } else {
> - dev_err(&hpdev->hbus->hdev->device,
> + dev_err(&hbus->hdev->device,
> "Attempt to write beyond a function's config space.\n");
> }
> }
> @@ -3568,6 +3595,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> hbus->bridge->domain_nr = dom;
> #ifdef CONFIG_X86
> hbus->sysdata.domain = dom;
> + hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS);
> #elif defined(CONFIG_ARM64)
> /*
> * Set the PCI bus parent to be the corresponding VMbus
> @@ -3577,6 +3605,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> * information to devices created on the bus.
> */
> hbus->sysdata.parent = hdev->device.parent;
> + hbus->use_calls = false;
> #endif
>
> hbus->hdev = hdev;
> --
> 1.8.3.1
>
>
Powered by blists - more mailing lists