[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Zr46t-JO7KPXCrn2@J2N7QTR9R3.cambridge.arm.com>
Date: Thu, 15 Aug 2024 18:28:23 +0100
From: Mark Rutland <mark.rutland@....com>
To: Gowthami Thiagarajan <gthiagarajan@...vell.com>
Cc: will@...nel.org, linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org, sgoutham@...vell.com,
lcherian@...vell.com, gcherian@...vell.com
Subject: Re: [PATCH v6] perf/marvell: Marvell PEM performance monitor support
On Thu, Aug 01, 2024 at 07:59:17PM +0530, Gowthami Thiagarajan wrote:
> PCI Express Interface PMU includes various performance counters
> to monitor the data that is transmitted over the PCIe link. The
> counters track various inbound and outbound transactions which
> includes separate counters for posted/non-posted/completion TLPs.
> Also, inbound and outbound memory read requests along with their
> latencies can also be monitored. Address Translation Services(ATS)events
> such as ATS Translation, ATS Page Request, ATS Invalidation along with
> their corresponding latencies are also supported.
>
> The performance counters are 64 bits wide.
>
> For instance,
> perf stat -e ib_tlp_pr <workload>
> tracks the inbound posted TLPs for the workload.
>
> Signed-off-by: Gowthami Thiagarajan <gthiagarajan@...vell.com>
> ---
> +static int pem_perf_event_init(struct perf_event *event)
> +{
> + struct pem_pmu *pmu = to_pem_pmu(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + if (is_sampling_event(event) ||
> + event->attach_state & PERF_ATTACH_TASK) {
> + return -EOPNOTSUPP;
> + }
> +
> + if (event->cpu < 0)
> + return -EOPNOTSUPP;
> +
> + /* We must NOT create groups containing mixed PMUs */
> + if (event->group_leader->pmu != event->pmu &&
> + !is_software_event(event->group_leader))
> + return -EINVAL;
> +
This should check the entire group and should check for cross-event
conflicts or where the entire group is too large to fit into the
(maximum possible) potential set of counters.
Below in pem_perf_event_start() you reset the HW counter when starting
an event, so multiple events *must not* share the same HW counter; they
will clobber each other and events will be lost, leading to incorrect
results.
Either you need to track counter allocations, or you need to handle the
counters as free-running and *never* reset/reprogram them.
I also don't see any logic to start/stop the entire PMU, which really
means you don't support group semantics at all, and should reject groups
with more than 1 non-SW event, since the events are counting for
different times anyway.
> + /*
> + * Set ownership of event to one CPU, same event can not be observed
> + * on multiple cpus at same time.
> + */
> + event->cpu = pmu->cpu;
> + hwc->idx = -1;
> + return 0;
> +}
> +
> +static void pem_perf_counter_reset(struct pem_pmu *pmu,
> + struct perf_event *event, int eventid)
> +{
> + writeq_relaxed(0x0, pmu->base + eventid_to_offset(eventid));
> +}
> +
> +static u64 pem_perf_read_counter(struct pem_pmu *pmu,
> + struct perf_event *event, int eventid)
> +{
> + return readq_relaxed(pmu->base + eventid_to_offset(eventid));
> +}
> +
> +static void pem_perf_event_update(struct perf_event *event)
> +{
> + struct pem_pmu *pmu = to_pem_pmu(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> + u64 prev_count, new_count;
> +
> + do {
> + prev_count = local64_read(&hwc->prev_count);
> + new_count = pem_perf_read_counter(pmu, event, hwc->idx);
> + } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
> +
> + local64_add((new_count - prev_count), &event->count);
> +}
> +
> +static void pem_perf_event_start(struct perf_event *event, int flags)
> +{
> + struct pem_pmu *pmu = to_pem_pmu(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> + int eventid = hwc->idx;
> +
> + local64_set(&hwc->prev_count, 0);
> +
> + pem_perf_counter_reset(pmu, event, eventid);
> +
> + hwc->state = 0;
> +}
> +
> +static int pem_perf_event_add(struct perf_event *event, int flags)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + hwc->idx = event->attr.config;
As above, this will go wrong when two events have the same
event->attr.config, regardless of whther those events are in the same
group or not.
Either
(a) Track which counters are currently allocated, and reject an event
targetting an already-allocated counter here.
(b) Treat all HW counters as free-running and never reset/reprogam them.
In pem_perf_event_start() you'd need to read the *current* value of
the HW counter and set this in &hwc->prev_count.
> + if (hwc->idx >= PEM_EVENTIDS_MAX)
> + return -EINVAL;
This should have been rejected at event_init() time. If that's violated
here it should result in a warning.
> + hwc->state |= PERF_HES_STOPPED;
> +
> + if (flags & PERF_EF_START)
> + pem_perf_event_start(event, flags);
> +
> + return 0;
> +}
[...]
> +static int pem_perf_probe(struct platform_device *pdev)
> +{
> + struct pem_pmu *pem_pmu;
> + struct resource *res;
> + void __iomem *base;
> + char *name;
> + int ret;
> +
> + pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
> + if (!pem_pmu)
> + return -ENOMEM;
> +
> + pem_pmu->dev = &pdev->dev;
> + platform_set_drvdata(pdev, pem_pmu);
> +
> + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> + if (IS_ERR(base))
> + return PTR_ERR(base);
> +
> + pem_pmu->base = base;
> +
> + pem_pmu->pmu = (struct pmu) {
> + .module = THIS_MODULE,
> + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> + .task_ctx_nr = perf_invalid_context,
> + .attr_groups = pem_perf_attr_groups,
> + .event_init = pem_perf_event_init,
> + .add = pem_perf_event_add,
> + .del = pem_perf_event_del,
> + .start = pem_perf_event_start,
> + .stop = pem_perf_event_stop,
> + .read = pem_perf_event_update,
> + };
> +
> + /* Choose this cpu to collect perf data */
> + pem_pmu->cpu = raw_smp_processor_id();
> +
> + name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
> + res->start);
> + if (!name)
> + return -ENOMEM;
> +
> + cpuhp_state_add_instance_nocalls
> + (CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
> + &pem_pmu->node);
Nit: weird formatting: the opening '(' for a function call should not be
on a new line.
Mark.
Powered by blists - more mailing lists