[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1a1af428-0794-514d-1d79-52e3796a0707@linux.alibaba.com>
Date: Mon, 29 May 2023 14:33:20 +0800
From: Shuai Xue <xueshuai@...ux.alibaba.com>
To: Baolin Wang <baolin.wang@...ux.alibaba.com>,
chengyou@...ux.alibaba.com, kaishen@...ux.alibaba.com,
helgaas@...nel.org, yangyicong@...wei.com, will@...nel.org,
Jonathan.Cameron@...wei.com, robin.murphy@....com
Cc: linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
linux-pci@...r.kernel.org, rdunlap@...radead.org,
mark.rutland@....com, zhuo.song@...ux.alibaba.com
Subject: Re: [PATCH v5 3/4] drivers/perf: add DesignWare PCIe PMU driver
On 2023/5/29 14:13, Baolin Wang wrote:
>
>
> On 5/22/2023 11:54 AM, Shuai Xue wrote:
>> This commit adds the PCIe Performance Monitoring Unit (PMU) driver support
>> for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express
>> Core controller IP which provides statistics feature. The PMU is not a PCIe
>> Root Complex integrated End Point(RCiEP) device but only register counters
>> provided by each PCIe Root Port.
>>
>> To facilitate collection of statistics the controller provides the
>> following two features for each Root Port:
>>
>> - Time Based Analysis (RX/TX data throughput and time spent in each
>> low-power LTSSM state)
>> - Event counters (Error and Non-Error for lanes)
>>
>> Note, only one counter for each type and does not overflow interrupt.
>>
>> This driver adds PMU devices for each PCIe Root Port. And the PMU device is
>> named based the BDF of Root Port. For example,
>>
>> 30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
>>
>> the PMU device name for this Root Port is dwc_rootport_3018.
>>
>> Example usage of counting PCIe RX TLP data payload (Units of 16 bytes)::
>>
>> $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
>>
>> average RX bandwidth can be calculated like this:
>>
>> PCIe TX Bandwidth = PCIE_TX_DATA * 16B / Measure_Time_Window
>>
>> Signed-off-by: Shuai Xue <xueshuai@...ux.alibaba.com>
>> Reported-by: kernel test robot <lkp@...el.com>
>> Link: https://lore.kernel.org/oe-kbuild-all/202305170639.XU3djFZX-lkp@intel.com/
>> ---
>
> [snip]
>
>> +static int dwc_pcie_pmu_remove(struct platform_device *pdev)
>> +{
>> + struct dwc_pcie_pmu_priv *priv = platform_get_drvdata(pdev);
>> + struct dwc_pcie_pmu *pcie_pmu;
>> +
>> + list_for_each_entry(pcie_pmu, &priv->pmu_nodes, pmu_node) {
>> + cpuhp_state_remove_instance(dwc_pcie_pmu_hp_state,
>> + &pcie_pmu->cpuhp_node);
>> + perf_pmu_unregister(&pcie_pmu->pmu);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int dwc_pcie_pmu_probe(struct platform_device *pdev)
>> +{
>> + struct dwc_pcie_pmu_priv *priv;
>> +
>> + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
>> + if (!priv)
>> + return -ENOMEM;
>> +
>> + priv->dev = &pdev->dev;
>> + platform_set_drvdata(pdev, priv);
>> +
>> + /* If one PMU registration fails, remove all. */
>> + if (__dwc_pcie_pmu_probe(priv))
>> + dwc_pcie_pmu_remove(pdev);
>
> In this case, you should return error from __dwc_pcie_pmu_probe() instead of returning 0, to release the requested resources of the PMU deivce.
You are right, will fix it in next version.
>
>> +
>> + return 0;
>> +}
>> +
>> +static void dwc_pcie_pmu_migrate(struct dwc_pcie_pmu *pcie_pmu, unsigned int cpu)
>> +{
>> + /* This PMU does NOT support interrupt, just migrate context. */
>> + perf_pmu_migrate_context(&pcie_pmu->pmu, pcie_pmu->oncpu, cpu);
>> + pcie_pmu->oncpu = cpu;
>> +}
>> +
>> +static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
>> +{
>> + struct dwc_pcie_pmu *pcie_pmu;
>> + struct pci_dev *pdev;
>> + int node;
>> +
>> + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
>> + pdev = pcie_pmu->pdev;
>> + node = dev_to_node(&pdev->dev);
>> +
>> + if (node != NUMA_NO_NODE && cpu_to_node(pcie_pmu->oncpu) != node &&
>> + cpu_to_node(cpu) == node)
>> + dwc_pcie_pmu_migrate(pcie_pmu, cpu);
>> +
>> + return 0;
>> +}
>> +
>> +static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
>> +{
>> + struct dwc_pcie_pmu *pcie_pmu;
>> + struct pci_dev *pdev;
>> + int node;
>> + cpumask_t mask;
>> + unsigned int target;
>> +
>> + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
>> + if (cpu != pcie_pmu->oncpu)
>> + return 0;
>> +
>> + pdev = pcie_pmu->pdev;
>> + node = dev_to_node(&pdev->dev);
>> + if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
>> + cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
>> + target = cpumask_any(&mask);
>> + else
>> + target = cpumask_any_but(cpu_online_mask, cpu);
>> + if (target < nr_cpu_ids)
>> + dwc_pcie_pmu_migrate(pcie_pmu, target);
>> +
>> + return 0;
>> +}
>> +
>> +static struct platform_driver dwc_pcie_pmu_driver = {
>> + .probe = dwc_pcie_pmu_probe,
>> + .remove = dwc_pcie_pmu_remove,
>> + .driver = {.name = "dwc_pcie_pmu",},
>> +};
>> +
>> +static int __init dwc_pcie_pmu_init(void)
>> +{
>> + int ret;
>> +
>> + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>> + "perf/dwc_pcie_pmu:online",
>> + dwc_pcie_pmu_online_cpu,
>> + dwc_pcie_pmu_offline_cpu);
>> + if (ret < 0)
>> + return ret;
>> +
>> + dwc_pcie_pmu_hp_state = ret;
>> +
>> + ret = platform_driver_register(&dwc_pcie_pmu_driver);
>> + if (ret) {
>> + cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
>> + return ret;
>> + }
>> +
>> + dwc_pcie_pmu_dev = platform_device_register_simple(
>> + "dwc_pcie_pmu", PLATFORM_DEVID_NONE, NULL, 0);
>> + if (IS_ERR(dwc_pcie_pmu_dev)) {
>> + platform_driver_unregister(&dwc_pcie_pmu_driver);
>> + return PTR_ERR(dwc_pcie_pmu_dev);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void __exit dwc_pcie_pmu_exit(void)
>> +{
>> + platform_device_unregister(dwc_pcie_pmu_dev);
>> + platform_driver_unregister(&dwc_pcie_pmu_driver);
>
> You should also call 'cpuhp_remove_multi_state()' when exiting the driver.
Good catch, will add it in next version.
>
> With above issues fixed, you can add:
> Reviewed-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
Thank you :)
Best Regards,
Shuai
Powered by blists - more mailing lists