linux-kernel - Re: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240129120436.00000f18@Huawei.com>
Date: Mon, 29 Jan 2024 12:04:36 +0000
From: Jonathan Cameron <Jonathan.Cameron@...wei.com>
To: Gowthami Thiagarajan <gthiagarajan@...vell.com>
CC: <will@...nel.org>, <mark.rutland@....com>,
	<linux-arm-kernel@...ts.infradead.org>, <linux-kernel@...r.kernel.org>,
	<sgoutham@...vell.com>, <bbhushan2@...vell.com>, <george.cherian@...vell.com>
Subject: Re: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor
 support

On Mon, 22 Jan 2024 18:19:32 +0530
Gowthami Thiagarajan <gthiagarajan@...vell.com> wrote:

> Odyssey DRAM Subsystem supports eight counters for monitoring performance
> and software can program those counters to monitor any of the defined
> performance events. Supported performance events include those counted
> at the interface between the DDR controller and the PHY, interface between
> the DDR Controller and the CHI interconnect, or within the DDR Controller.
> 
> Additionally DSS also supports two fixed performance event counters, one
> for ddr reads and the other for ddr writes.
> 
> Signed-off-by: Gowthami Thiagarajan <gthiagarajan@...vell.com>
Hi Gowthami,

A quick drive by review.
Questions like whether this patch should be split are down to the perf maintainers.
I would ask for it, but not my area of the kernel ;)

Jonathan


> +#define VERSION_V1				1
> +#define VERSION_V2				2

Prefix these defines + perhaps make them an enum?

> +
>  struct cn10k_ddr_pmu {
>  	struct pmu pmu;
>  	void __iomem *base;
> +	struct ddr_pmu_platform_data *p_data;

const both because it should be and to avoid casting away the const.


> +	int version;
>  	unsigned int cpu;
>  	struct	device *dev;
>  	int active_events;
> @@ -134,6 +160,54 @@ struct cn10k_ddr_pmu {
>  
>  #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
>  
> +struct ddr_pmu_platform_data {
> +	u64 counter_overflow_val;
> +	u64 counter_max_val;
> +	u64 ddrc_perf_cnt_base;
> +	u64 ddrc_perf_cfg_base;
> +	u64 ddrc_perf_cnt_op_mode_ctrl;

Good to name these in a fashion that makes it clear what they are.
Some are values, some are register address offsets I think?

Shy is ddrc_perf prefix useful in here?


> +	u64 ddrc_perf_cnt_start_op_ctrl;
> +	u64 ddrc_perf_cnt_end_op_ctrl;
> +	u64 ddrc_perf_cnt_end_status;
> +	u64 ddrc_perf_cnt_freerun_en;
> +	u64 ddrc_perf_cnt_freerun_ctrl;
> +	u64 ddrc_perf_cnt_freerun_clr;
> +	u64 ddrc_perf_cnt_value_wr_op;
> +	u64 ddrc_perf_cnt_value_rd_op;
> +};
> +
> +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
> +	.counter_overflow_val =  BIT_ULL(48),
> +	.counter_max_val = GENMASK_ULL(48, 0),
> +	.ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
> +	.ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
> +	.ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
> +	.ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
> +	.ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
> +	.ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
> +	.ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
> +	.ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
> +	.ddrc_perf_cnt_freerun_clr = 0,
> +	.ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
> +	.ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
> +};
> +
> +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
> +	.counter_overflow_val = 0,
> +	.counter_max_val = GENMASK_ULL(63, 0),
> +	.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
> +	.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
> +	.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
> +	.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
> +	.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
> +	.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
> +	.ddrc_perf_cnt_freerun_en = 0,
> +	.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
> +	.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
> +	.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
> +	.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
> +};


..

> -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
> +				     struct cn10k_ddr_pmu *ddr_pmu)
>  {
> +	int ret = 0;
> +
>  	switch (eventid) {
>  	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
>  	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
>  		*event_bitmap = (1ULL << (eventid - 1));
>  		break;
> +	case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
> +		if (ddr_pmu->version == VERSION_V2) {
> +			*event_bitmap = (1ULL << (eventid - 1));
> +		} else {
> +			pr_err("%s Invalid eventid %d\n", __func__, eventid);
> +			ret = -EINVAL;
> +		}
> +		break;
>  	case EVENT_OP_IS_ENTER_SELFREF:
>  	case EVENT_OP_IS_ENTER_POWERDOWN:
>  	case EVENT_OP_IS_ENTER_MPSM:
> @@ -280,10 +451,10 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
>  		break;
>  	default:
>  		pr_err("%s Invalid eventid %d\n", __func__, eventid);
> -		return -EINVAL;
> +		ret = -EINVAL;
>  	}
>  
> -	return 0;
> +	return ret;
Why?  Just return in the various paths above. 

Direct returns make for easier to review code as you can follow a particular path through
more quickly.

>  }

>  static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
>  {
>  	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
> +	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
>  
>  	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
> -		       DDRC_PERF_CNT_END_OP_CTRL);
> +		       p_data->ddrc_perf_cnt_end_op_ctrl);
>  }
>  
>  static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> @@ -549,6 +778,7 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
>  
>  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  {
> +	struct ddr_pmu_platform_data *p_data = pmu->p_data;
>  	struct perf_event *event;
>  	struct hw_perf_event *hwc;
>  	u64 prev_count, new_count;
> @@ -561,7 +791,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  		prev_count = local64_read(&hwc->prev_count);
>  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
>  
> -		/* Overflow condition is when new count less than
> +		/*
> +		 * Overflow condition is when new count less than
>  		 * previous count
>  		 */
>  		if (new_count < prev_count)
> @@ -574,7 +805,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  		prev_count = local64_read(&hwc->prev_count);
>  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
>  
> -		/* Overflow condition is when new count less than
> +		/*
> +		 * Overflow condition is when new count less than

Good to fix this, but not in a patch doing anything meaningful. If you want
to make comment syntax changes - separate patch.

>  		 * previous count
>  		 */
>  		if (new_count < prev_count)
> @@ -586,11 +818,23 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  			continue;
>  
>  		value = cn10k_ddr_perf_read_counter(pmu, i);
> -		if (value == DDRC_PERF_CNT_MAX_VALUE) {
> +		if (value == p_data->counter_max_val) {
>  			pr_info("Counter-(%d) reached max value\n", i);
> -			cn10k_ddr_perf_event_update_all(pmu);
> -			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> -			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> +			/*
> +			 * As separate control register is added for each
> +			 * counter in odyssey, no need to update all
> +			 * the events
> +			 */
> +			if (pmu->version == VERSION_V2) {

This sort of version difference is often better handled via a callback
in the your pdata structure.  Makes it easy to add a new one for v3 :)

> +				cn10k_ddr_perf_event_update(pmu->events[i]);
> +				cn10k_ddr_perf_counter_stop(pmu, i);
> +				cn10k_ddr_perf_counter_start(pmu, i);
> +
> +			} else {
> +				cn10k_ddr_perf_event_update_all(pmu);
> +				cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> +				cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> +			}
>  		}
>  	}
>  
> @@ -631,7 +875,10 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>  
>  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  {
> +	struct ddr_pmu_platform_data *pltfm_data;
> +	struct device *dev = &pdev->dev;
>  	struct cn10k_ddr_pmu *ddr_pmu;
> +	const char  *compatible;
>  	struct resource *res;
>  	void __iomem *base;
>  	char *name;
> @@ -642,6 +889,14 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  
>  	ddr_pmu->dev = &pdev->dev;
> +
> +	pltfm_data = (struct ddr_pmu_platform_data *)
> +		      device_get_match_data(&pdev->dev);

Shouldn't need the cast as it's a const void *
and you should not need to modify it in here (so make your
data types 
const struct ddr_pmu_platform * 



> +	if (!pltfm_data) {
> +		dev_err(&pdev->dev, "Error: No device match data found\n");
> +		return -ENODEV;
> +	}
> +	ddr_pmu->p_data = pltfm_data;
>  	platform_set_drvdata(pdev, ddr_pmu);
>  
>  	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> @@ -650,25 +905,59 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  
>  	ddr_pmu->base = base;
>  
> -	/* Setup the PMU counter to work in manual mode */
> -	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
> -		       DDRC_PERF_CNT_OP_MODE_CTRL);
> -
> -	ddr_pmu->pmu = (struct pmu) {
> -		.module	      = THIS_MODULE,
> -		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> -		.task_ctx_nr = perf_invalid_context,
> -		.attr_groups = cn10k_attr_groups,
> -		.event_init  = cn10k_ddr_perf_event_init,
> -		.add	     = cn10k_ddr_perf_event_add,
> -		.del	     = cn10k_ddr_perf_event_del,
> -		.start	     = cn10k_ddr_perf_event_start,
> -		.stop	     = cn10k_ddr_perf_event_stop,
> -		.read	     = cn10k_ddr_perf_event_update,
> -		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> -		.pmu_disable = cn10k_ddr_perf_pmu_disable,
> -	};
> +	ret = device_property_read_string(dev, "compatible", &compatible);
> +	if (ret) {
> +		pr_err("compatible property not found\n");
> +		return ret;
> +	}
>  
> +	if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
> +		     strlen(compatible)) == 0))

Why not just embed this in your pdata structure?
Even better would be add data to reflect the actual differences
rather than relying on a 'version' number.  It tends to be more
extensible as new implementations surface to encode each difference
as data in such a structure.  Otherwise, in the long run you
end up with big switch statements for the many different versions
which just provide some per version constants.  That's messy.


> +		ddr_pmu->version = VERSION_V1;
> +	else
> +		ddr_pmu->version = VERSION_V2;
> +
> +	if (ddr_pmu->version == VERSION_V1) {
> +		ddr_pmu->pmu = (struct pmu) {
> +			.module	      = THIS_MODULE,
> +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> +			.task_ctx_nr = perf_invalid_context,
> +			.attr_groups = cn10k_attr_groups,
> +			.event_init  = cn10k_ddr_perf_event_init,
> +			.add	     = cn10k_ddr_perf_event_add,
> +			.del	     = cn10k_ddr_perf_event_del,
> +			.start	     = cn10k_ddr_perf_event_start,
> +			.stop	     = cn10k_ddr_perf_event_stop,
> +			.read	     = cn10k_ddr_perf_event_update,
> +			.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> +			.pmu_disable = cn10k_ddr_perf_pmu_disable,
> +		};
> +
> +	/*
> +	 * As we have separate control registers for each counter in Odyssey,
> +	 * setting up the mode will be done when we enable each counter
> +	 *

Trivial: Odd formatting. I'd drop the blank commented line and add a full stop.

> +	 */
> +
> +	/* Setup the PMU counter to work in manual mode */
> +		writeq(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
> +		      (ddr_pmu->p_data->ddrc_perf_cnt_op_mode_ctrl));
> +	} else {
> +		ddr_pmu->pmu = (struct pmu) {
> +			.module       = THIS_MODULE,
> +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> +			.task_ctx_nr = perf_invalid_context,
> +			.attr_groups = odyssey_attr_groups,
> +			.event_init  = cn10k_ddr_perf_event_init,
> +			.add         = cn10k_ddr_perf_event_add,
> +			.del         = cn10k_ddr_perf_event_del,
> +			.start       = cn10k_ddr_perf_event_start,
> +			.stop        = cn10k_ddr_perf_event_stop,
> +			.read        = cn10k_ddr_perf_event_update,
> +			.pmu_enable  = NULL,
> +			.pmu_disable = NULL,

No need to set these to NULL.  Not providing them has same result and
I don't think there is any particular value wrt to 'documentation' of
setting them explicitly.  If there is a reason this needs calling out
I'd expect a comment explaining why.

Ideal patch series structure for changes like this patch makes would be:
1) Refactor to pull out the pdata - no functional change.
2) Patch adding the support for the new device.

Result is easier to review than the combination of the two changes.


> +		};
> +	}