lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <tzr54an2wvrnf5vnhwkbafwgnik5evcssa2wskx5txhxtgqh6v@wy7dri5muaex>
Date: Tue, 4 Nov 2025 10:13:56 +0900
From: Koichiro Den <den@...inux.co.jp>
To: Frank Li <Frank.li@....com>
Cc: ntb@...ts.linux.dev, linux-pci@...r.kernel.org, 
	linux-kernel@...r.kernel.org, jdmason@...zu.us, dave.jiang@...el.com, allenbh@...il.com, 
	mani@...nel.org, kwilczynski@...nel.org, kishon@...nel.org, bhelgaas@...gle.com, 
	jbrunet@...libre.com, lpieralisi@...nel.org, yebin10@...wei.com, 
	geert+renesas@...der.be, arnd@...db.de
Subject: Re: [PATCH v2 6/6] PCI: endpoint: pci-epf-vntb: manage ntb_dev
 lifetime and fix vpci bus teardown

On Fri, Oct 31, 2025 at 03:52:22PM -0400, Frank Li wrote:
> On Thu, Oct 30, 2025 at 11:20:22AM +0900, Koichiro Den wrote:
> > On Wed, Oct 29, 2025 at 12:53:40PM -0400, Frank Li wrote:
> > > On Wed, Oct 29, 2025 at 05:03:21PM +0900, Koichiro Den wrote:
> > > > Currently ntb_dev is embedded in epf_ntb, while configfs allows starting
> > > > or stopping controller and linking or unlinking functions as you want.
> > > > In fact, re-linking and re-starting is not possible with the embedded
> > > > design and leads to oopses.
> > > >
> > > > Allocate ntb_dev with devm and add a .remove callback to the pci driver
> > > > that calls ntb_unregister_device(). This allows a fresh device to be
> > > > created on the next .bind call.
> > > >
> > > > With these changes, the controller can now be stopped, a function
> > > > unlinked, configfs settings updated, and the controller re-linked and
> > > > restarted without rebooting the endpoint, as long as the underlying
> > > > pci_epc_ops .stop() operation is non-destructive, and .start() can
> > > > restore normal operations.
> > > >
> > > > Signed-off-by: Koichiro Den <den@...inux.co.jp>
> > > > ---
> > > >  drivers/pci/endpoint/functions/pci-epf-vntb.c | 66 +++++++++++++++----
> > > >  1 file changed, 52 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > > > index 750a246f79c9..3059ed85a955 100644
> > > > --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > > > +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > > > @@ -118,7 +118,7 @@ struct epf_ntb_ctrl {
> > > >  } __packed;
> > > >
> > > >  struct epf_ntb {
> > > > -	struct ntb_dev ntb;
> > > > +	struct ntb_dev *ntb;
> > > >  	struct pci_epf *epf;
> > > >  	struct config_group group;
> > > >
> > > > @@ -144,10 +144,16 @@ struct epf_ntb {
> > > >  	void __iomem *vpci_mw_addr[MAX_MW];
> > > >
> > > >  	struct delayed_work cmd_handler;
> > > > +
> > > > +	struct pci_bus *vpci_bus;
> > > >  };
> > > >
> > > >  #define to_epf_ntb(epf_group) container_of((epf_group), struct epf_ntb, group)
> > > > -#define ntb_ndev(__ntb) container_of(__ntb, struct epf_ntb, ntb)
> > > > +
> > > > +static inline struct epf_ntb *ntb_ndev(struct ntb_dev *ntb)
> > > > +{
> > > > +	return (struct epf_ntb *)ntb->pdev->sysdata;
> > > > +}
> > > >
> > > >  static struct pci_epf_header epf_ntb_header = {
> > > >  	.vendorid	= PCI_ANY_ID,
> > > > @@ -173,7 +179,7 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
> > > >  	else
> > > >  		ntb->reg->link_status &= ~LINK_STATUS_UP;
> > > >
> > > > -	ntb_link_event(&ntb->ntb);
> > > > +	ntb_link_event(ntb->ntb);
> > > >  	return 0;
> > > >  }
> > > >
> > > > @@ -261,7 +267,7 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
> > > >  	for (i = 1; i < ntb->db_count; i++) {
> > > >  		if (ntb->epf_db[i]) {
> > > >  			ntb->db |= 1 << (i - 1);
> > > > -			ntb_db_event(&ntb->ntb, i);
> > > > +			ntb_db_event(ntb->ntb, i);
> > > >  			ntb->epf_db[i] = 0;
> > > >  		}
> > > >  	}
> > > > @@ -1097,12 +1103,24 @@ static int vpci_scan_bus(void *sysdata)
> > > >  {
> > > >  	struct pci_bus *vpci_bus;
> > > >  	struct epf_ntb *ndev = sysdata;
> > > > -
> > > > -	vpci_bus = pci_scan_bus(ndev->vbus_number, &vpci_ops, sysdata);
> > > > +	LIST_HEAD(resources);
> > > > +	static struct resource busn_res = {
> > > > +		.start = 0,
> > > > +		.end = 255,
> > > > +		.flags = IORESOURCE_BUS,
> > > > +	};
> > > > +
> > > > +	pci_add_resource(&resources, &ioport_resource);
> > > > +	pci_add_resource(&resources, &iomem_resource);
> > > > +	pci_add_resource(&resources, &busn_res);
> > > > +
> > > > +	vpci_bus = pci_scan_root_bus(&ndev->epf->epc->dev, ndev->vbus_number,
> > > > +				     &vpci_ops, sysdata, &resources);
> > >
> > > look this part is not belong to this patch. just change API
> > > pci_scan_bus() to pci_scan_root_bus()?
> >
> > To make things work symmetrically and avoid crashes (when unlinking),
> > pci_scan_bus() needed to be switched to pci_scan_root_bus() to set the
> > parent device, like no longer existing pci_scan_bus_parented(). Otherwise,
> > pci_epf_unbind()->epf_ntb_unbind()->pci_remove_root_bus()->pci_bus_release_domain_nr()
> > would crash.
> >
> > Perhaps I should've added an explanation for this in the git commit message?
> 
> Can you create new patch just convert pci_scan_bus() to pci_scan_root_bus()
> and explanation at this new patch's commit message.

Ok, I'll do so. Let me respin the series later (v3).
Thanks for the review.

-Koichiro

> 
> Frank
> 
> >
> > Thanks for reviewing.
> >
> > -Koichiro
> >
> > >
> > > Frank
> > >
> > > >  	if (!vpci_bus) {
> > > >  		pr_err("create pci bus failed\n");
> > > >  		return -EINVAL;
> > > >  	}
> > > > +	ndev->vpci_bus = vpci_bus;
> > > >
> > > >  	pci_bus_add_devices(vpci_bus);
> > > >
> > > > @@ -1147,7 +1165,7 @@ static int vntb_epf_mw_set_trans(struct ntb_dev *ndev, int pidx, int idx,
> > > >  	int ret;
> > > >  	struct device *dev;
> > > >
> > > > -	dev = &ntb->ntb.dev;
> > > > +	dev = &ntb->ntb->dev;
> > > >  	barno = ntb->epf_ntb_bar[BAR_MW1 + idx];
> > > >  	epf_bar = &ntb->epf->bar[barno];
> > > >  	epf_bar->phys_addr = addr;
> > > > @@ -1247,7 +1265,7 @@ static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
> > > >  	ret = pci_epc_raise_irq(ntb->epf->epc, func_no, vfunc_no,
> > > >  				PCI_IRQ_MSI, interrupt_num + 1);
> > > >  	if (ret)
> > > > -		dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
> > > > +		dev_err(&ntb->ntb->dev, "Failed to raise IRQ\n");
> > > >
> > > >  	return ret;
> > > >  }
> > > > @@ -1334,9 +1352,12 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > >  	struct epf_ntb *ndev = (struct epf_ntb *)pdev->sysdata;
> > > >  	struct device *dev = &pdev->dev;
> > > >
> > > > -	ndev->ntb.pdev = pdev;
> > > > -	ndev->ntb.topo = NTB_TOPO_NONE;
> > > > -	ndev->ntb.ops =  &vntb_epf_ops;
> > > > +	ndev->ntb = devm_kzalloc(dev, sizeof(*ndev->ntb), GFP_KERNEL);
> > > > +	if (!ndev->ntb)
> > > > +		return -ENOMEM;
> > > > +	ndev->ntb->pdev = pdev;
> > > > +	ndev->ntb->topo = NTB_TOPO_NONE;
> > > > +	ndev->ntb->ops = &vntb_epf_ops;
> > > >
> > > >  	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
> > > >  	if (ret) {
> > > > @@ -1344,7 +1365,7 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > >  		return ret;
> > > >  	}
> > > >
> > > > -	ret = ntb_register_device(&ndev->ntb);
> > > > +	ret = ntb_register_device(ndev->ntb);
> > > >  	if (ret) {
> > > >  		dev_err(dev, "Failed to register NTB device\n");
> > > >  		return ret;
> > > > @@ -1354,6 +1375,17 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > >  	return 0;
> > > >  }
> > > >
> > > > +static void pci_vntb_remove(struct pci_dev *pdev)
> > > > +{
> > > > +	struct epf_ntb *ndev = (struct epf_ntb *)pdev->sysdata;
> > > > +
> > > > +	if (!ndev || !ndev->ntb)
> > > > +		return;
> > > > +
> > > > +	ntb_unregister_device(ndev->ntb);
> > > > +	ndev->ntb = NULL;
> > > > +}
> > > > +
> > > >  static struct pci_device_id pci_vntb_table[] = {
> > > >  	{
> > > >  		PCI_DEVICE(0xffff, 0xffff),
> > > > @@ -1365,6 +1397,7 @@ static struct pci_driver vntb_pci_driver = {
> > > >  	.name           = "pci-vntb",
> > > >  	.id_table       = pci_vntb_table,
> > > >  	.probe          = pci_vntb_probe,
> > > > +	.remove         = pci_vntb_remove,
> > > >  };
> > > >
> > > >  /* ============ PCIe EPF Driver Bind ====================*/
> > > > @@ -1447,10 +1480,15 @@ static void epf_ntb_unbind(struct pci_epf *epf)
> > > >  {
> > > >  	struct epf_ntb *ntb = epf_get_drvdata(epf);
> > > >
> > > > +	pci_unregister_driver(&vntb_pci_driver);
> > > > +
> > > > +	pci_lock_rescan_remove();
> > > > +	pci_stop_root_bus(ntb->vpci_bus);
> > > > +	pci_remove_root_bus(ntb->vpci_bus);
> > > > +	pci_unlock_rescan_remove();
> > > > +
> > > >  	epf_ntb_epc_cleanup(ntb);
> > > >  	epf_ntb_config_spad_bar_free(ntb);
> > > > -
> > > > -	pci_unregister_driver(&vntb_pci_driver);
> > > >  }
> > > >
> > > >  // EPF driver probe
> > > > --
> > > > 2.48.1
> > > >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ