[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200615120053.GZ2428291@smile.fi.intel.com>
Date: Mon, 15 Jun 2020 15:00:53 +0300
From: Andy Shevchenko <andriy.shevchenko@...ux.intel.com>
To: Shiju Jose <shiju.jose@...wei.com>
Cc: linux-acpi@...r.kernel.org, linux-pci@...r.kernel.org,
linux-kernel@...r.kernel.org, rjw@...ysocki.net, bp@...en8.de,
james.morse@....com, lenb@...nel.org, tony.luck@...el.com,
dan.carpenter@...cle.com, zhangliguang@...ux.alibaba.com,
wangkefeng.wang@...wei.com, jroedel@...e.de,
yangyicong@...ilicon.com, jonathan.cameron@...wei.com,
tanxiaofei@...wei.com
Subject: Re: [PATCH v9 2/2] PCI: hip: Add handling of HiSilicon HIP PCIe
controller errors
On Mon, Jun 15, 2020 at 11:15:52AM +0100, Shiju Jose wrote:
> From: Yicong Yang <yangyicong@...ilicon.com>
>
> The HiSilicon HIP PCIe controller is capable of handling errors
> on root port and perform port reset separately at each root port.
>
> Add error handling driver for HIP PCIe controller to log
> and report recoverable errors. Perform root port reset and restore
> link status after the recovery.
>
> Following are some of the PCIe controller's recoverable errors
> 1. completion transmission timeout error.
> 2. CRS retry counter over the threshold error.
> 3. ECC 2 bit errors
> 4. AXI bresponse/rresponse errors etc.
>
> The driver placed in the drivers/pci/controller/ because the
> HIP PCIe controller does not use DWC ip.
...
> +#include <linux/acpi.h>
> +#include <acpi/ghes.h>
bits.h ?
> +#include <linux/delay.h>
> +#include <linux/pci.h>
> +#include <linux/platform_device.h>
> +#include <linux/kfifo.h>
> +#include <linux/spinlock.h>
...
> +static guid_t hisi_pcie_sec_type = GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D,
> + 0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72);
Can we have it in more common pattern, i.e.
static guid_t hisi_pcie_sec_type =
GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D,
0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72);
?
...
> +#define HISI_PCIE_CORE_PORT_ID(v) (((v) % 8) << 1)
% -> & ?
...
> +struct hisi_pcie_error_private {
> + struct notifier_block nb;
> + struct platform_device *pdev;
Do you really need platform device? Isn't struct device * enough?
> +};
...
> +static char *hisi_pcie_sub_module_name(u8 id)
> +{
> + switch (id) {
> + case HISI_PCIE_SUB_MODULE_ID_AP: return "AP Layer";
> + case HISI_PCIE_SUB_MODULE_ID_TL: return "TL Layer";
> + case HISI_PCIE_SUB_MODULE_ID_MAC: return "MAC Layer";
> + case HISI_PCIE_SUB_MODULE_ID_DL: return "DL Layer";
> + case HISI_PCIE_SUB_MODULE_ID_SDI: return "SDI Layer";
> + }
match_string() ?
> + return "unknown";
> +}
> +
> +static char *hisi_pcie_error_severity(u8 err_sev)
> +{
> + switch (err_sev) {
> + case HISI_ERR_SEV_RECOVERABLE: return "recoverable";
> + case HISI_ERR_SEV_FATAL: return "fatal";
> + case HISI_ERR_SEV_CORRECTED: return "corrected";
> + case HISI_ERR_SEV_NONE: return "none";
> + }
Ditto?
> + return "unknown";
> +}
...
> + pdev = pci_get_domain_bus_and_slot(domain, busnr, devfn);
> + if (!pdev) {
> + dev_info(device, "Fail to get root port %04x:%02x:%02x.%d device\n",
> + domain, busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
pci_info() ?
> + return -ENODEV;
> + }
...
> + /*
> + * The initialization time of subordinate devices after
> + * hot reset is no more than 1s, which is required by
> + * the PCI spec v5.0 sec 6.6.1. The time will shorten
> + * if Readiness Notifications mechanisms are used. But
> + * wait 1s here to adapt any conditions.
> + */
> + ssleep(1UL);
It's a huge time out... Can we reduce it somehow?
...
> + for (i = 0; i < HISI_PCIE_ERR_MISC_REGS; i++) {
> + if (edata->val_bits &
> + BIT_ULL(HISI_PCIE_LOCAL_VALID_ERR_MISC + i))
for_each_set_bit() ?
> + dev_info(dev,
> + "ERR_MISC_%d = 0x%x\n", i, edata->err_misc[i]);
> + }
> +
> + /* Recovery for the PCIe controller errors */
> + if (edata->err_severity == HISI_ERR_SEV_RECOVERABLE) {
Perhaps negative conditional?
> + /* try reset PCI port for the error recovery */
> + rc = hisi_pcie_port_do_recovery(pdev, edata->socket_id,
> + HISI_PCIE_PORT_ID(edata->core_id, edata->port_id));
> + if (rc) {
> + dev_info(dev, "fail to do hisi pcie port reset\n");
> + return;
redundant.
> + }
> + }
...
> + const struct hisi_pcie_error_data *error_data =
> + acpi_hest_get_payload(gdata);
One line is better to read.
> + struct platform_device *pdev = priv->pdev;
> + hisi_pcie_handle_error(pdev, error_data);
And how exactly _platform_ device pointer is being used?
...
> + dev_err(&pdev->dev, "%s : ghes_register_event_notifier fail\n",
> + __func__);
Make error message more descriptive that __func__ will not be needed.
...
> + kfree(priv);
Double free?
...
> +static const struct acpi_device_id hisi_pcie_acpi_match[] = {
> + { "HISI0361", 0 },
', 0' part is not necessary to have.
> + { }
> +};
--
With Best Regards,
Andy Shevchenko
Powered by blists - more mailing lists