lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHNKnsTVGZXF_kUU5YgWmM64_8yAE75=2w1H2A40Wb0y=n8YMg@mail.gmail.com>
Date:   Tue, 30 Aug 2022 05:02:49 +0300
From:   Sergey Ryazanov <ryazanov.s.a@...il.com>
To:     M Chetan Kumar <m.chetan.kumar@...el.com>
Cc:     netdev@...r.kernel.org, Jakub Kicinski <kuba@...nel.org>,
        David Miller <davem@...emloft.net>,
        Johannes Berg <johannes@...solutions.net>,
        Loic Poulain <loic.poulain@...aro.org>,
        "Sudi, Krishna C" <krishna.c.sudi@...el.com>,
        M Chetan Kumar <m.chetan.kumar@...ux.intel.com>,
        Intel Corporation <linuxwwan@...el.com>,
        Haijun Liu <haijun.liu@...iatek.com>,
        Madhusmita Sahu <madhusmita.sahu@...el.com>,
        Ricardo Martinez <ricardo.martinez@...ux.intel.com>,
        Devegowda Chandrashekar <chandrashekar.devegowda@...el.com>
Subject: Re: [PATCH net-next 3/5] net: wwan: t7xx: PCIe reset rescan

On Tue, Aug 16, 2022 at 7:12 AM <m.chetan.kumar@...el.com> wrote:
> From: Haijun Liu <haijun.liu@...iatek.com>
>
> PCI rescan module implements "rescan work queue". In firmware flashing
> or coredump collection procedure WWAN device is programmed to boot in
> fastboot mode and a work item is scheduled for removal & detection.
> The WWAN device is reset using APCI call as part driver removal flow.
> Work queue rescans pci bus at fixed interval for device detection,
> later when device is detect work queue exits.

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
> index 871f2a27a398..2f5c6fbe601e 100644
> --- a/drivers/net/wwan/t7xx/t7xx_pci.c
> +++ b/drivers/net/wwan/t7xx/t7xx_pci.c
> @@ -715,8 +716,11 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>                 return ret;
>         }
>
> +       t7xx_rescan_done();
>         t7xx_pcie_mac_set_int(t7xx_dev, MHCCIF_INT);
>         t7xx_pcie_mac_interrupts_en(t7xx_dev);
> +       if (!t7xx_dev->hp_enable)
> +               pci_ignore_hotplug(pdev);

pci_ignore_hotplug() also disables hotplug events for a parent bridge.
Is that how this call was intended?

>
>         return 0;
>  }

[skipped]

> +static void __exit t7xx_pci_cleanup(void)
> +{
> +       int remove_flag = 0;
> +       struct device *dev;
> +
> +       dev = driver_find_device(&t7xx_pci_driver.driver, NULL, NULL, t7xx_always_match);
> +       if (dev) {
> +               pr_debug("unregister t7xx PCIe driver while device is still exist.\n");
> +               put_device(dev);
> +               remove_flag = 1;
> +       } else {
> +               pr_debug("no t7xx PCIe driver found.\n");
> +       }
> +
> +       pci_lock_rescan_remove();
> +       pci_unregister_driver(&t7xx_pci_driver);
> +       pci_unlock_rescan_remove();
> +       t7xx_rescan_deinit();
> +
> +       if (remove_flag) {
> +               pr_debug("remove t7xx PCI device\n");
> +               pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
> +       }

What is the purpose of these operations? Should not the PCI core do
this for us on the driver unregister?

> +}
> +
> +module_exit(t7xx_pci_cleanup);
>
>  MODULE_AUTHOR("MediaTek Inc");
>  MODULE_DESCRIPTION("MediaTek PCIe 5G WWAN modem T7xx driver");

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
> new file mode 100644
> index 000000000000..045777d8a843
> --- /dev/null
> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c

[skipped]

> +void t7xx_pci_dev_rescan(void)
> +{
> +       struct pci_bus *b = NULL;
> +
> +       pci_lock_rescan_remove();
> +       while ((b = pci_find_next_bus(b)))
> +               pci_rescan_bus(b);

The driver does not need to rescan all buses. The device should appear
on the same bus, so the driver just needs to rescan a single and
already known bus.

> +
> +       pci_unlock_rescan_remove();
> +}

[skipped]

> +static void t7xx_remove_rescan(struct work_struct *work)
> +{
> +       struct pci_dev *pdev;
> +       int num_retries = RESCAN_RETRIES;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +       g_mtk_rescan_context.rescan_done = 0;
> +       pdev = g_mtk_rescan_context.dev;
> +       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +
> +       if (pdev) {
> +               pci_stop_and_remove_bus_device_locked(pdev);

What is the purpose of removing the device then trying to find it by
rescanning the bus? Would not it be easier to save a PCI device
configuration, reset the device, and then restore the configuration?

The DEVLINK_CMD_RELOAD description states that this command performs
(see include/uapi/linux/devlink.h):

Hot driver reload, makes configuration changes take place. The
devlink instance is not released during the process.

And the devlink_reload() function in net/core/devlink.c is able to
survive the devlink structure memory freeing only by accident. But the
PCI device removing should do exactly that: call the device removing
callback, which will release the devlink instance.

> +               pr_debug("start remove and rescan flow\n");
> +       }
> +
> +       do {
> +               t7xx_pci_dev_rescan();
> +               spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +               if (g_mtk_rescan_context.rescan_done) {
> +                       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +                       break;
> +               }
> +
> +               spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +               msleep(DELAY_RESCAN_MTIME);
> +       } while (num_retries--);
> +}

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> index e53651ee2005..dfd7fb487fc0 100644
> --- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> +++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> @@ -156,6 +156,12 @@ static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int
>  {
>         const struct t7xx_port_conf *port_conf = port->port_conf;
>
> +       if (state == MD_STATE_EXCEPTION) {
> +               if (port->wwan_port)
> +                       wwan_port_txoff(port->wwan_port);
> +               return;
> +       }
> +

Looks unrelated to the patch description. Does this hunk really belong
to the patch?

>         if (state != MD_STATE_READY)
>                 return;
>

--
Sergey

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ