[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <32626689-c8b1-9bd-b00-5285c633bfbc@os.amperecomputing.com>
Date: Wed, 15 Nov 2023 19:50:04 -0800 (PST)
From: Ilkka Koskinen <ilkka@...amperecomputing.com>
To: Shuai Xue <xueshuai@...ux.alibaba.com>
cc: kaishen@...ux.alibaba.com, helgaas@...nel.org,
yangyicong@...wei.com, will@...nel.org,
Jonathan.Cameron@...wei.com, baolin.wang@...ux.alibaba.com,
robin.murphy@....com, chengyou@...ux.alibaba.com,
linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
linux-pci@...r.kernel.org, rdunlap@...radead.org,
mark.rutland@....com, zhuo.song@...ux.alibaba.com,
renyu.zj@...ux.alibaba.com
Subject: Re: [PATCH v10 4/5] drivers/perf: add DesignWare PCIe PMU driver
Hi Shuai,
I have a few comments below
On Sat, 4 Nov 2023, Shuai Xue wrote:
> This commit adds the PCIe Performance Monitoring Unit (PMU) driver support
> for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express
> Core controller IP which provides statistics feature. The PMU is a PCIe
> configuration space register block provided by each PCIe Root Port in a
> Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
> injection, and Statistics).
>
> To facilitate collection of statistics the controller provides the
> following two features for each Root Port:
>
> - one 64-bit counter for Time Based Analysis (RX/TX data throughput and
> time spent in each low-power LTSSM state) and
> - one 32-bit counter for Event Counting (error and non-error events for
> a specified lane)
>
> Note: There is no interrupt for counter overflow.
>
> This driver adds PMU devices for each PCIe Root Port. And the PMU device is
> named based the BDF of Root Port. For example,
>
> 30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
>
> the PMU device name for this Root Port is dwc_rootport_3018.
>
> Example usage of counting PCIe RX TLP data payload (Units of bytes)::
>
> $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
>
> average RX bandwidth can be calculated like this:
>
> PCIe TX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
>
> Signed-off-by: Shuai Xue <xueshuai@...ux.alibaba.com>
> Reviewed-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
> Reviewed-by: Yicong Yang <yangyicong@...ilicon.com>
> ---
> drivers/perf/Kconfig | 7 +
> drivers/perf/Makefile | 1 +
> drivers/perf/dwc_pcie_pmu.c | 798 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 806 insertions(+)
> create mode 100644 drivers/perf/dwc_pcie_pmu.c
...
> diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
> new file mode 100644
> index 000000000000..9485c41de322
> --- /dev/null
> +++ b/drivers/perf/dwc_pcie_pmu.c
> @@ -0,0 +1,798 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Synopsys DesignWare PCIe PMU driver
> + *
> + * Copyright (C) 2021-2023 Alibaba Inc.
> + */
> +
...
> +static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
> + bool enable)
> +{
> + struct pci_dev *pdev = pcie_pmu->pdev;
> + u16 ras_des_offset = pcie_pmu->ras_des_offset;
> +
> + if (enable)
> + pci_clear_and_set_dword(pdev,
> + ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
> + DWC_PCIE_TIME_BASED_TIMER_START, 0x1);
> + else
> + pci_clear_and_set_dword(pdev,
> + ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
> + DWC_PCIE_TIME_BASED_TIMER_START, 0x0);
It's a matter of taste, but you could simply do:
pci_clear_and_set_dword(pdev,
ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
DWC_PCIE_TIME_BASED_TIMER_START, enable);
However, I'm fine with either way.
> +static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
> +{
> + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
> + struct pci_dev *pdev = pcie_pmu->pdev;
> + u16 ras_des_offset = pcie_pmu->ras_des_offset;
> + u32 val;
> +
> + pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
> +
> + return val;
> +}
...
> +static int dwc_pcie_register_dev(struct pci_dev *pdev)
> +{
> + struct platform_device *plat_dev;
> + struct dwc_pcie_dev_info *dev_info;
> + int ret;
> + u32 bdf;
> +
> + bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
> + plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf,
> + pdev, sizeof(*pdev));
> + ret = PTR_ERR_OR_ZERO(plat_dev);
> + if (ret)
> + return ret;
platform_device_register_data() doesn't return a null pointer and you
don't really need 'ret'. You could do something like instead:
if (IS_ERR(plat_dev))
return PTR_ERR(plat_dev);
> + dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
> + if (!dev_info)
> + return -ENOMEM;
> +
> + /* Cache platform device to handle pci device hotplug */
> + dev_info->plat_dev = plat_dev;
> + dev_info->pdev = pdev;
> + list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head);
> +
> + return 0;
> +}
> +
> +static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct device *dev = data;
> + struct pci_dev *pdev = to_pci_dev(dev);
> + struct dwc_pcie_dev_info *dev_info;
> +
> + switch (action) {
> + case BUS_NOTIFY_ADD_DEVICE:
> + if (!dwc_pcie_match_des_cap(pdev))
> + return NOTIFY_DONE;
> + if (dwc_pcie_register_dev(pdev))
> + return NOTIFY_BAD;
> + break;
> + case BUS_NOTIFY_DEL_DEVICE:
> + dev_info = dwc_pcie_find_dev_info(pdev);
> + if (!dev_info)
> + return NOTIFY_DONE;
> + dwc_pcie_unregister_dev(dev_info);
> + break;
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +static struct notifier_block dwc_pcie_pmu_nb = {
> + .notifier_call = dwc_pcie_pmu_notifier,
> +};
> +
> +static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
> +{
> + struct pci_dev *pdev = plat_dev->dev.platform_data;
> + struct dwc_pcie_pmu *pcie_pmu;
> + char *name;
> + u32 bdf, val;
> + u16 vsec;
> + int ret;
> +
> + vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
> + DWC_PCIE_VSEC_RAS_DES_ID);
You nicely changed to use vendor list in this version but here the driver
still tries to find Alibaba specific capability. I guess, you could search
again using the vendor list. The other option would be to make
dwc_pcie_match_des_cap() to return the vendor id, pass it to
dwc_pcie_register_dev(), which would add it to device's platform data with
the pointer to the pci device.
Cheers, Ilkka
> + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
> + bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
> + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf);
> + if (!name)
> + return -ENOMEM;
> +
> + pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
> + if (!pcie_pmu)
> + return -ENOMEM;
> +
> + pcie_pmu->pdev = pdev;
> + pcie_pmu->ras_des_offset = vsec;
> + pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
> + pcie_pmu->on_cpu = -1;
> + pcie_pmu->pmu = (struct pmu){
> + .name = name,
> + .parent = &pdev->dev,
> + .module = THIS_MODULE,
> + .attr_groups = dwc_pcie_attr_groups,
> + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> + .task_ctx_nr = perf_invalid_context,
> + .event_init = dwc_pcie_pmu_event_init,
> + .add = dwc_pcie_pmu_event_add,
> + .del = dwc_pcie_pmu_event_del,
> + .start = dwc_pcie_pmu_event_start,
> + .stop = dwc_pcie_pmu_event_stop,
> + .read = dwc_pcie_pmu_event_update,
> + };
> +
> + /* Add this instance to the list used by the offline callback */
> + ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
> + &pcie_pmu->cpuhp_node);
> + if (ret) {
> + pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf);
> + return ret;
> + }
> +
> + /* Unwind when platform driver removes */
> + ret = devm_add_action_or_reset(&plat_dev->dev,
> + dwc_pcie_pmu_remove_cpuhp_instance,
> + &pcie_pmu->cpuhp_node);
> + if (ret)
> + return ret;
> +
> + ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
> + if (ret) {
> + pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf);
> + return ret;
> + }
> + ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
> + pcie_pmu);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
Powered by blists - more mailing lists