[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAEEQ3wmjMJxbnb1dS2g-wojvB9G2w3N7UK9wqFtw77eWE-tEKQ@mail.gmail.com>
Date: Thu, 28 Aug 2025 17:56:55 +0800
From: yunhui cui <cuiyunhui@...edance.com>
To: Atish Patra <atishp@...osinc.com>
Cc: Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>,
Rob Herring <robh@...nel.org>, Krzysztof Kozlowski <krzk+dt@...nel.org>, Conor Dooley <conor+dt@...nel.org>,
Anup Patel <anup@...infault.org>, Atish Patra <atishp@...shpatra.org>,
Will Deacon <will@...nel.org>, Mark Rutland <mark.rutland@....com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>, weilin.wang@...el.com,
linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
Conor Dooley <conor@...nel.org>, devicetree@...r.kernel.org, kvm@...r.kernel.org,
kvm-riscv@...ts.infradead.org, linux-arm-kernel@...ts.infradead.org,
linux-perf-users@...r.kernel.org
Subject: Re: [External] [PATCH v5 14/21] RISC-V: perf: Implement supervisor
counter delegation support
Hi Atish,
On Fri, Mar 28, 2025 at 3:43 AM Atish Patra <atishp@...osinc.com> wrote:
>
> There are few new RISC-V ISA exensions (ssccfg, sscsrind, smcntrpmf) which
> allows the hpmcounter/hpmevents to be programmed directly from S-mode. The
> implementation detects the ISA extension at runtime and uses them if
> available instead of SBI PMU extension. SBI PMU extension will still be
> used for firmware counters if the user requests it.
>
> The current linux driver relies on event encoding defined by SBI PMU
> specification for standard perf events. However, there are no standard
> event encoding available in the ISA. In the future, we may want to
> decouple the counter delegation and SBI PMU completely. In that case,
> counter delegation supported platforms must rely on the event encoding
> defined in the perf json file or in the pmu driver.
>
> For firmware events, it will continue to use the SBI PMU encoding as
> one can not support firmware event without SBI PMU.
>
> Signed-off-by: Atish Patra <atishp@...osinc.com>
> ---
> arch/riscv/include/asm/csr.h | 1 +
> drivers/perf/riscv_pmu_dev.c | 561 +++++++++++++++++++++++++++++++++--------
> include/linux/perf/riscv_pmu.h | 3 +
> 3 files changed, 462 insertions(+), 103 deletions(-)
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 3d2d4f886c77..8b2f5ae1d60e 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -255,6 +255,7 @@
> #endif
>
> #define SISELECT_SSCCFG_BASE 0x40
> +#define HPMEVENT_MASK GENMASK_ULL(63, 56)
>
> /* mseccfg bits */
> #define MSECCFG_PMM ENVCFG_PMM
> diff --git a/drivers/perf/riscv_pmu_dev.c b/drivers/perf/riscv_pmu_dev.c
> index 6f64404a6e3d..7c4a1ef15866 100644
> --- a/drivers/perf/riscv_pmu_dev.c
> +++ b/drivers/perf/riscv_pmu_dev.c
> @@ -27,6 +27,8 @@
> #include <asm/cpufeature.h>
> #include <asm/vendor_extensions.h>
> #include <asm/vendor_extensions/andes.h>
> +#include <asm/hwcap.h>
> +#include <asm/csr_ind.h>
>
> #define ALT_SBI_PMU_OVERFLOW(__ovl) \
> asm volatile(ALTERNATIVE_2( \
> @@ -59,14 +61,31 @@ asm volatile(ALTERNATIVE( \
> #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
> #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
>
> -PMU_FORMAT_ATTR(event, "config:0-47");
> +#define RVPMU_SBI_PMU_FORMAT_ATTR "config:0-47"
> +#define RVPMU_CDELEG_PMU_FORMAT_ATTR "config:0-55"
> +
> +static ssize_t __maybe_unused rvpmu_format_show(struct device *dev, struct device_attribute *attr,
> + char *buf);
> +
> +#define RVPMU_ATTR_ENTRY(_name, _func, _config) ( \
> + &((struct dev_ext_attribute[]) { \
> + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \
> + })[0].attr.attr)
> +
> +#define RVPMU_FORMAT_ATTR_ENTRY(_name, _config) \
> + RVPMU_ATTR_ENTRY(_name, rvpmu_format_show, (char *)_config)
> +
> PMU_FORMAT_ATTR(firmware, "config:62-63");
>
> static bool sbi_v2_available;
> static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
> #define sbi_pmu_snapshot_available() \
> static_branch_unlikely(&sbi_pmu_snapshot_available)
> +
> static DEFINE_STATIC_KEY_FALSE(riscv_pmu_sbi_available);
> +#define riscv_pmu_sbi_available() \
> + static_branch_likely(&riscv_pmu_sbi_available)
> +
> static DEFINE_STATIC_KEY_FALSE(riscv_pmu_cdeleg_available);
>
> /* Avoid unnecessary code patching in the one time booting path*/
> @@ -81,19 +100,35 @@ static DEFINE_STATIC_KEY_FALSE(riscv_pmu_cdeleg_available);
> #define riscv_pmu_sbi_available() \
> static_branch_likely(&riscv_pmu_sbi_available)
>
> -static struct attribute *riscv_arch_formats_attr[] = {
> - &format_attr_event.attr,
> +static struct attribute *riscv_sbi_pmu_formats_attr[] = {
> + RVPMU_FORMAT_ATTR_ENTRY(event, RVPMU_SBI_PMU_FORMAT_ATTR),
> + &format_attr_firmware.attr,
> + NULL,
> +};
> +
> +static struct attribute_group riscv_sbi_pmu_format_group = {
> + .name = "format",
> + .attrs = riscv_sbi_pmu_formats_attr,
> +};
> +
> +static const struct attribute_group *riscv_sbi_pmu_attr_groups[] = {
> + &riscv_sbi_pmu_format_group,
> + NULL,
> +};
> +
> +static struct attribute *riscv_cdeleg_pmu_formats_attr[] = {
> + RVPMU_FORMAT_ATTR_ENTRY(event, RVPMU_CDELEG_PMU_FORMAT_ATTR),
> &format_attr_firmware.attr,
> NULL,
> };
>
> -static struct attribute_group riscv_pmu_format_group = {
> +static struct attribute_group riscv_cdeleg_pmu_format_group = {
> .name = "format",
> - .attrs = riscv_arch_formats_attr,
> + .attrs = riscv_cdeleg_pmu_formats_attr,
> };
>
> -static const struct attribute_group *riscv_pmu_attr_groups[] = {
> - &riscv_pmu_format_group,
> +static const struct attribute_group *riscv_cdeleg_pmu_attr_groups[] = {
> + &riscv_cdeleg_pmu_format_group,
> NULL,
> };
>
> @@ -395,6 +430,14 @@ static void rvpmu_sbi_check_std_events(struct work_struct *work)
>
> static DECLARE_WORK(check_std_events_work, rvpmu_sbi_check_std_events);
>
> +static ssize_t rvpmu_format_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct dev_ext_attribute *eattr = container_of(attr,
> + struct dev_ext_attribute, attr);
> + return sysfs_emit(buf, "%s\n", (char *)eattr->var);
> +}
> +
> static int rvpmu_ctr_get_width(int idx)
> {
> return pmu_ctr_list[idx].width;
> @@ -447,6 +490,38 @@ static uint8_t rvpmu_csr_index(struct perf_event *event)
> return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
> }
>
> +static uint64_t get_deleg_priv_filter_bits(struct perf_event *event)
> +{
> + u64 priv_filter_bits = 0;
> + bool guest_events = false;
> +
> + if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS)
> + guest_events = true;
> + if (event->attr.exclude_kernel)
> + priv_filter_bits |= guest_events ? HPMEVENT_VSINH : HPMEVENT_SINH;
> + if (event->attr.exclude_user)
> + priv_filter_bits |= guest_events ? HPMEVENT_VUINH : HPMEVENT_UINH;
> + if (guest_events && event->attr.exclude_hv)
> + priv_filter_bits |= HPMEVENT_SINH;
> + if (event->attr.exclude_host)
> + priv_filter_bits |= HPMEVENT_UINH | HPMEVENT_SINH;
> + if (event->attr.exclude_guest)
> + priv_filter_bits |= HPMEVENT_VSINH | HPMEVENT_VUINH;
> +
> + return priv_filter_bits;
> +}
> +
> +static bool pmu_sbi_is_fw_event(struct perf_event *event)
> +{
> + u32 type = event->attr.type;
> + u64 config = event->attr.config;
> +
> + if (type == PERF_TYPE_RAW && ((config >> 63) == 1))
> + return true;
> + else
> + return false;
> +}
> +
> static unsigned long rvpmu_sbi_get_filter_flags(struct perf_event *event)
> {
> unsigned long cflags = 0;
> @@ -475,7 +550,8 @@ static int rvpmu_sbi_ctr_get_idx(struct perf_event *event)
> struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> struct sbiret ret;
> int idx;
> - uint64_t cbase = 0, cmask = rvpmu->cmask;
> + u64 cbase = 0;
> + unsigned long ctr_mask = rvpmu->cmask;
> unsigned long cflags = 0;
>
> cflags = rvpmu_sbi_get_filter_flags(event);
> @@ -488,21 +564,23 @@ static int rvpmu_sbi_ctr_get_idx(struct perf_event *event)
> if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) {
> if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
> cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
> - cmask = 1;
> + ctr_mask = 1;
> } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
> cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
> - cmask = BIT(CSR_INSTRET - CSR_CYCLE);
> + ctr_mask = BIT(CSR_INSTRET - CSR_CYCLE);
> }
> + } else if (pmu_sbi_is_fw_event(event)) {
> + ctr_mask = firmware_cmask;
> }
>
> /* retrieve the available counter index */
> #if defined(CONFIG_32BIT)
> ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
> - cmask, cflags, hwc->event_base, hwc->config,
> + ctr_mask, cflags, hwc->event_base, hwc->config,
> hwc->config >> 32);
> #else
> ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
> - cmask, cflags, hwc->event_base, hwc->config, 0);
> + ctr_mask, cflags, hwc->event_base, hwc->config, 0);
> #endif
> if (ret.error) {
> pr_debug("Not able to find a counter for event %lx config %llx\n",
> @@ -511,7 +589,7 @@ static int rvpmu_sbi_ctr_get_idx(struct perf_event *event)
> }
>
> idx = ret.value;
> - if (!test_bit(idx, &rvpmu->cmask) || !pmu_ctr_list[idx].value)
> + if (!test_bit(idx, &ctr_mask) || !pmu_ctr_list[idx].value)
> return -ENOENT;
>
> /* Additional sanity check for the counter id */
> @@ -561,17 +639,6 @@ static int sbi_pmu_event_find_cache(u64 config)
> return ret;
> }
>
> -static bool pmu_sbi_is_fw_event(struct perf_event *event)
> -{
> - u32 type = event->attr.type;
> - u64 config = event->attr.config;
> -
> - if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
> - return true;
> - else
> - return false;
> -}
> -
> static int rvpmu_sbi_event_map(struct perf_event *event, u64 *econfig)
> {
> u32 type = event->attr.type;
> @@ -602,7 +669,6 @@ static int rvpmu_sbi_event_map(struct perf_event *event, u64 *econfig)
> * 10 - SBI firmware events
> * 11 - Risc-V platform specific firmware event
> */
> -
> switch (config >> 62) {
> case 0:
> /* Return error any bits [48-63] is set as it is not allowed by the spec */
> @@ -634,6 +700,84 @@ static int rvpmu_sbi_event_map(struct perf_event *event, u64 *econfig)
> return ret;
> }
>
> +static int cdeleg_pmu_event_find_cache(u64 config, u64 *eventid, uint32_t *counter_mask)
> +{
> + unsigned int cache_type, cache_op, cache_result;
> +
> + if (!current_pmu_cache_event_map)
> + return -ENOENT;
> +
> + cache_type = (config >> 0) & 0xff;
> + if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
> + return -EINVAL;
> +
> + cache_op = (config >> 8) & 0xff;
> + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
> + return -EINVAL;
> +
> + cache_result = (config >> 16) & 0xff;
> + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
> + return -EINVAL;
> +
> + if (eventid)
> + *eventid = current_pmu_cache_event_map[cache_type][cache_op]
> + [cache_result].event_id;
> + if (counter_mask)
> + *counter_mask = current_pmu_cache_event_map[cache_type][cache_op]
> + [cache_result].counter_mask;
> +
> + return 0;
> +}
> +
> +static int rvpmu_cdeleg_event_map(struct perf_event *event, u64 *econfig)
> +{
> + u32 type = event->attr.type;
> + u64 config = event->attr.config;
> + int ret = 0;
> +
> + /*
> + * There are two ways standard perf events can be mapped to platform specific
> + * encoding.
> + * 1. The vendor may specify the encodings in the driver.
> + * 2. The Perf tool for RISC-V may remap the standard perf event to platform
> + * specific encoding.
> + *
> + * As RISC-V ISA doesn't define any standard event encoding. Thus, perf tool allows
> + * vendor to define it via json file. The encoding defined in the json will override
> + * the perf legacy encoding. However, some user may want to run performance
> + * monitoring without perf tool as well. That's why, vendors may specify the event
> + * encoding in the driver as well if they want to support that use case too.
> + * If an encoding is defined in the json, it will be encoded as a raw event.
> + */
> +
> + switch (type) {
> + case PERF_TYPE_HARDWARE:
> + if (config >= PERF_COUNT_HW_MAX)
> + return -EINVAL;
> + if (!current_pmu_hw_event_map)
> + return -ENOENT;
> +
> + *econfig = current_pmu_hw_event_map[config].event_id;
> + if (*econfig == HW_OP_UNSUPPORTED)
> + ret = -ENOENT;
> + break;
> + case PERF_TYPE_HW_CACHE:
> + ret = cdeleg_pmu_event_find_cache(config, econfig, NULL);
> + if (*econfig == HW_OP_UNSUPPORTED)
> + ret = -ENOENT;
> + break;
> + case PERF_TYPE_RAW:
> + *econfig = config & RISCV_PMU_DELEG_RAW_EVENT_MASK;
> + break;
> + default:
> + ret = -ENOENT;
> + break;
> + }
> +
> + /* event_base is not used for counter delegation */
> + return ret;
> +}
> +
> static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
> {
> int cpu;
> @@ -717,7 +861,7 @@ static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
> return 0;
> }
>
> -static u64 rvpmu_sbi_ctr_read(struct perf_event *event)
> +static u64 rvpmu_ctr_read(struct perf_event *event)
> {
> struct hw_perf_event *hwc = &event->hw;
> int idx = hwc->idx;
> @@ -794,10 +938,6 @@ static void rvpmu_sbi_ctr_start(struct perf_event *event, u64 ival)
> if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
> pr_err("Starting counter idx %d failed with error %d\n",
> hwc->idx, sbi_err_map_linux_errno(ret.error));
> -
> - if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
> - (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
> - rvpmu_set_scounteren((void *)event);
> }
>
> static void rvpmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
> @@ -808,10 +948,6 @@ static void rvpmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
> struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
> struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
>
> - if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
> - (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
> - rvpmu_reset_scounteren((void *)event);
> -
> if (sbi_pmu_snapshot_available())
> flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
>
> @@ -847,12 +983,6 @@ static int rvpmu_sbi_find_num_ctrs(void)
> return sbi_err_map_linux_errno(ret.error);
> }
>
> -static u32 rvpmu_deleg_find_ctrs(void)
> -{
> - /* TODO */
> - return 0;
> -}
> -
> static int rvpmu_sbi_get_ctrinfo(u32 nsbi_ctr, u32 *num_fw_ctr, u32 *num_hw_ctr)
> {
> struct sbiret ret;
> @@ -930,53 +1060,75 @@ static inline void rvpmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
> }
> }
>
> -/*
> - * This function starts all the used counters in two step approach.
> - * Any counter that did not overflow can be start in a single step
> - * while the overflowed counters need to be started with updated initialization
> - * value.
> - */
> -static inline void rvpmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
> - u64 ctr_ovf_mask)
> +static void rvpmu_deleg_ctr_start_mask(unsigned long mask)
> {
> - int idx = 0, i;
> - struct perf_event *event;
> - unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
> - unsigned long ctr_start_mask = 0;
> - uint64_t max_period;
> - struct hw_perf_event *hwc;
> - u64 init_val = 0;
> + unsigned long scountinhibit_val = 0;
>
> - for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
> - ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
> - /* Start all the counters that did not overflow in a single shot */
> - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
> - 0, 0, 0, 0);
> - }
> + scountinhibit_val = csr_read(CSR_SCOUNTINHIBIT);
> + scountinhibit_val &= ~mask;
> +
> + csr_write(CSR_SCOUNTINHIBIT, scountinhibit_val);
> +}
> +
> +static void rvpmu_deleg_ctr_enable_irq(struct perf_event *event)
> +{
> + unsigned long hpmevent_curr;
> + unsigned long of_mask;
> + struct hw_perf_event *hwc = &event->hw;
> + int counter_idx = hwc->idx;
> + unsigned long sip_val = csr_read(CSR_SIP);
> +
> + if (!is_sampling_event(event) || (sip_val & SIP_LCOFIP))
> + return;
>
> - /* Reinitialize and start all the counter that overflowed */
> - while (ctr_ovf_mask) {
> - if (ctr_ovf_mask & 0x01) {
> - event = cpu_hw_evt->events[idx];
> - hwc = &event->hw;
> - max_period = riscv_pmu_ctr_get_width_mask(event);
> - init_val = local64_read(&hwc->prev_count) & max_period;
> #if defined(CONFIG_32BIT)
> - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
> - flag, init_val, init_val >> 32, 0);
> + hpmevent_curr = csr_ind_read(CSR_SIREG5, SISELECT_SSCCFG_BASE, counter_idx);
> + of_mask = (u32)~HPMEVENTH_OF;
> #else
> - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
> - flag, init_val, 0, 0);
> + hpmevent_curr = csr_ind_read(CSR_SIREG2, SISELECT_SSCCFG_BASE, counter_idx);
> + of_mask = ~HPMEVENT_OF;
> +#endif
There are too many #if defined(CONFIG_32BIT) checks in the code. Could
we centralize their definitions in a unified place and unify the
32-bit/64-bit logic?
> +
> + hpmevent_curr &= of_mask;
> +#if defined(CONFIG_32BIT)
> + csr_ind_write(CSR_SIREG4, SISELECT_SSCCFG_BASE, counter_idx, hpmevent_curr);
> +#else
> + csr_ind_write(CSR_SIREG2, SISELECT_SSCCFG_BASE, counter_idx, hpmevent_curr);
> #endif
> - perf_event_update_userpage(event);
> - }
> - ctr_ovf_mask = ctr_ovf_mask >> 1;
> - idx++;
> - }
> }
>
> -static inline void rvpmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
> - u64 ctr_ovf_mask)
> +static void rvpmu_deleg_ctr_start(struct perf_event *event, u64 ival)
> +{
> + unsigned long scountinhibit_val = 0;
> + struct hw_perf_event *hwc = &event->hw;
> +
> +#if defined(CONFIG_32BIT)
> + csr_ind_write(CSR_SIREG, SISELECT_SSCCFG_BASE, hwc->idx, ival & 0xFFFFFFFF);
> + csr_ind_write(CSR_SIREG4, SISELECT_SSCCFG_BASE, hwc->idx, ival >> BITS_PER_LONG);
> +#else
> + csr_ind_write(CSR_SIREG, SISELECT_SSCCFG_BASE, hwc->idx, ival);
> +#endif
> +
> + rvpmu_deleg_ctr_enable_irq(event);
> +
> + scountinhibit_val = csr_read(CSR_SCOUNTINHIBIT);
> + scountinhibit_val &= ~(1 << hwc->idx);
> +
> + csr_write(CSR_SCOUNTINHIBIT, scountinhibit_val);
> +}
> +
...
>
> +#define RISCV_PMU_DELEG_RAW_EVENT_MASK GENMASK_ULL(55, 0)
> +
> #define HW_OP_UNSUPPORTED 0xFFFF
> #define CACHE_OP_UNSUPPORTED 0xFFFF
>
>
> --
> 2.43.0
>
>
Thanks,
Yunhui
Powered by blists - more mailing lists