lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9efb75fe-994f-24c7-7872-b5e5041105a6@samsung.com>
Date:   Thu, 18 Jul 2019 19:17:17 +0900
From:   Chanwoo Choi <cw00.choi@...sung.com>
To:     Dmitry Osipenko <digetx@...il.com>,
        Thierry Reding <thierry.reding@...il.com>,
        MyungJoo Ham <myungjoo.ham@...sung.com>,
        Kyungmin Park <kyungmin.park@...sung.com>,
        Jonathan Hunter <jonathanh@...dia.com>,
        Tomeu Vizoso <tomeu.vizoso@...labora.com>
Cc:     linux-pm@...r.kernel.org, linux-tegra@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 05/24] PM / devfreq: tegra30: Set up watermarks
 properly

On 19. 7. 8. 오전 7:32, Dmitry Osipenko wrote:
> The current implementation is inaccurate and results in very intensive
> interrupt activity, which neglects the whole idea of polling offload to
> hardware. The reason of the shortcoming is that watermarks are not set
> up correctly and this results in ACTMON constantly asking to change freq
> and then these requests are ignored. The end result of this patch is that
> there are few hundreds of ACTMON's interrupts instead of tens thousands
> after few minutes of a working devfreq, meanwhile the transitions activity
> stays about the same and governor becomes more reactive.
> 
> Since watermarks are set precisely correct now, the boosting logic is
> changed a tad to accommodate the change. The "average sustain coefficient"
> multiplier is gone now since there is no need to compensate the improper
> watermarks and EMC frequency-bump happens once boosting hits the upper
> watermark enough times, depending on the per-device boosting threshold.
> 
> Signed-off-by: Dmitry Osipenko <digetx@...il.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 293 +++++++++++++++++++++---------
>  1 file changed, 209 insertions(+), 84 deletions(-)
> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 4be7858c33bc..16f7e6cf3b99 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -47,6 +47,8 @@
>  
>  #define ACTMON_DEV_INTR_CONSECUTIVE_UPPER			BIT(31)
>  #define ACTMON_DEV_INTR_CONSECUTIVE_LOWER			BIT(30)
> +#define ACTMON_DEV_INTR_AVG_BELOW_WMARK				BIT(25)
> +#define ACTMON_DEV_INTR_AVG_ABOVE_WMARK				BIT(24)
>  
>  #define ACTMON_ABOVE_WMARK_WINDOW				1
>  #define ACTMON_BELOW_WMARK_WINDOW				3
> @@ -63,9 +65,8 @@
>   * ACTMON_AVERAGE_WINDOW_LOG2: default value for @DEV_CTRL_K_VAL, which
>   * translates to 2 ^ (K_VAL + 1). ex: 2 ^ (6 + 1) = 128
>   */
> -#define ACTMON_AVERAGE_WINDOW_LOG2			6
> -#define ACTMON_SAMPLING_PERIOD				12 /* ms */
> -#define ACTMON_DEFAULT_AVG_BAND				6  /* 1/10 of % */
> +#define ACTMON_AVERAGE_WINDOW_LOG2				6
> +#define ACTMON_SAMPLING_PERIOD					12 /* ms */
>  
>  #define KHZ							1000
>  
> @@ -142,9 +143,6 @@ struct tegra_devfreq_device {
>  	 * watermark breaches.
>  	 */
>  	unsigned long boost_freq;
> -
> -	/* Optimal frequency calculated from the stats for this device */
> -	unsigned long target_freq;
>  };
>  
>  struct tegra_devfreq {
> @@ -156,7 +154,6 @@ struct tegra_devfreq {
>  
>  	struct clk		*emc_clock;
>  	unsigned long		max_freq;
> -	unsigned long		cur_freq;
>  	struct notifier_block	rate_change_nb;
>  
>  	struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)];
> @@ -205,42 +202,182 @@ static unsigned long do_percent(unsigned long val, unsigned int pct)
>  	return val * pct / 100;
>  }
>  
> +static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra)
> +{
> +	struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
> +	unsigned int cpu_freq = cpufreq_get(0);
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
> +		if (cpu_freq >= ratio->cpu_freq) {
> +			if (ratio->emc_freq >= tegra->max_freq)
> +				return tegra->max_freq;
> +			else
> +				return ratio->emc_freq;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned long
> +tegra_actmon_account_cpu_freq(struct tegra_devfreq *tegra,
> +			      struct tegra_devfreq_device *dev,
> +			      unsigned long target_freq)
> +{
> +	unsigned long static_cpu_emc_freq;
> +
> +	if (dev->config->avg_dependency_threshold &&
> +	    dev->config->avg_dependency_threshold < dev->avg_count) {
> +		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra);
> +		target_freq = max(target_freq, static_cpu_emc_freq);
> +	}
> +
> +	return target_freq;
> +}
> +
> +static unsigned long tegra_actmon_lower_freq(struct tegra_devfreq *tegra,
> +					     unsigned long target_freq)
> +{
> +	unsigned long lower = target_freq;
> +	struct dev_pm_opp *opp;
> +
> +	opp = dev_pm_opp_find_freq_floor(tegra->devfreq->dev.parent, &lower);
> +	if (IS_ERR(opp))
> +		lower = 0;
> +	else
> +		dev_pm_opp_put(opp);
> +
> +	return lower;
> +}
> +
> +static unsigned long tegra_actmon_upper_freq(struct tegra_devfreq *tegra,
> +					     unsigned long target_freq)
> +{
> +	unsigned long upper = target_freq + 1;
> +	struct dev_pm_opp *opp;
> +
> +	opp = dev_pm_opp_find_freq_ceil(tegra->devfreq->dev.parent, &upper);
> +	if (IS_ERR(opp))
> +		upper = ULONG_MAX;
> +	else
> +		dev_pm_opp_put(opp);
> +
> +	return upper;
> +}
> +
> +static void tegra_actmon_get_lower_upper(struct tegra_devfreq *tegra,
> +					 struct tegra_devfreq_device *dev,
> +					 unsigned long target_freq,
> +					 unsigned long *lower,
> +					 unsigned long *upper)
> +{
> +	/*
> +	 * Memory frequencies are guaranteed to have 1MHz granularity
> +	 * and thus we need this rounding down to get a proper watermarks
> +	 * range in a case where target_freq falls into a range of
> +	 * next_possible_opp_freq - 1MHz.
> +	 */
> +	target_freq = round_down(target_freq, 1000000);
> +
> +	/* watermarks are set at the borders of the corresponding OPPs */
> +	*lower = tegra_actmon_lower_freq(tegra, target_freq);
> +	*upper = tegra_actmon_upper_freq(tegra, target_freq);
> +
> +	*lower /= KHZ;
> +	*upper /= KHZ;
> +
> +	/*
> +	 * The upper watermark should take into account CPU's frequency
> +	 * because cpu_to_emc_rate() may override the target_freq with
> +	 * a higher value and thus upper watermark need to be set up
> +	 * accordingly to avoid parasitic upper-events.
> +	 */
> +	*upper = tegra_actmon_account_cpu_freq(tegra, dev, *upper);
> +
> +	*lower *= ACTMON_SAMPLING_PERIOD;
> +	*upper *= ACTMON_SAMPLING_PERIOD;
> +}
> +
>  static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
>  					   struct tegra_devfreq_device *dev)
>  {
> -	u32 avg = dev->avg_count;
> -	u32 avg_band_freq = tegra->max_freq * ACTMON_DEFAULT_AVG_BAND / KHZ;
> -	u32 band = avg_band_freq * ACTMON_SAMPLING_PERIOD;
> +	unsigned long lower, upper, freq;
>  
> -	device_writel(dev, avg + band, ACTMON_DEV_AVG_UPPER_WMARK);
> +	freq = dev->avg_count / ACTMON_SAMPLING_PERIOD * KHZ;
> +	tegra_actmon_get_lower_upper(tegra, dev, freq, &lower, &upper);
>  
> -	avg = max(dev->avg_count, band);
> -	device_writel(dev, avg - band, ACTMON_DEV_AVG_LOWER_WMARK);
> +	/*
> +	 * We want to get interrupts when MCCPU client crosses the
> +	 * dependency threshold in order to take into / out of account
> +	 * the CPU's freq.
> +	 */
> +	if (lower < dev->config->avg_dependency_threshold &&
> +	    upper > dev->config->avg_dependency_threshold) {
> +		if (dev->avg_count < dev->config->avg_dependency_threshold)
> +			upper = dev->config->avg_dependency_threshold;
> +		else
> +			lower = dev->config->avg_dependency_threshold;
> +	}
> +
> +	device_writel(dev, lower, ACTMON_DEV_AVG_LOWER_WMARK);
> +	device_writel(dev, upper, ACTMON_DEV_AVG_UPPER_WMARK);
>  }
>  
>  static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
> -				       struct tegra_devfreq_device *dev)
> +				       struct tegra_devfreq_device *dev,
> +				       unsigned long freq)
>  {
> -	u32 val = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
> +	unsigned long lower, upper, delta;
> +
> +	/*
> +	 * Boosting logic kicks-in once lower / upper watermark is hit.
> +	 * The watermarks are based on the updated EMC rate and the
> +	 * average activity.
> +	 *
> +	 * The higher watermark is set in accordance to the EMC rate
> +	 * because we want to set it to the highest mark here and EMC rate
> +	 * represents that mark. The consecutive-upper interrupts are
> +	 * always enabled and we don't want to receive them if they won't
> +	 * do anything useful, hence the upper watermark is capped to maximum.
> +	 * Note that the EMC rate is changed once boosting pushed the rate
> +	 * too high, in that case boosting-up will be stopped because
> +	 * upper watermark is much higher now and it is *important* to
> +	 * stop the unwanted interrupts.
> +	 */
> +	tegra_actmon_get_lower_upper(tegra, dev, freq - 1, &lower, &upper);
> +
> +	delta = do_percent(upper - lower, dev->config->boost_up_threshold);
> +	device_writel(dev, lower + delta, ACTMON_DEV_UPPER_WMARK);
>  
> -	device_writel(dev, do_percent(val, dev->config->boost_up_threshold),
> -		      ACTMON_DEV_UPPER_WMARK);
> +	/*
> +	 * Meanwhile the lower mark is based on the average value
> +	 * because it is the lowest possible consecutive-mark for this
> +	 * device. Once that mark is hit and boosting is stopped, the
> +	 * interrupt is disabled by ISR.
> +	 */
> +	freq = dev->avg_count / ACTMON_SAMPLING_PERIOD * KHZ;
> +	tegra_actmon_get_lower_upper(tegra, dev, freq, &lower, &upper);
>  
> -	device_writel(dev, do_percent(val, dev->config->boost_down_threshold),
> -		      ACTMON_DEV_LOWER_WMARK);
> +	delta = do_percent(upper - lower, dev->config->boost_down_threshold);
> +	device_writel(dev, lower + delta, ACTMON_DEV_LOWER_WMARK);
>  }
>  
>  static void actmon_isr_device(struct tegra_devfreq *tegra,
>  			      struct tegra_devfreq_device *dev)
>  {
> -	u32 intr_status, dev_ctrl;
> +	u32 intr_status, dev_ctrl, avg_intr_mask;
>  
>  	dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT);
> -	tegra_devfreq_update_avg_wmark(tegra, dev);
> -
>  	intr_status = device_readl(dev, ACTMON_DEV_INTR_STATUS);
>  	dev_ctrl = device_readl(dev, ACTMON_DEV_CTRL);
>  
> +	avg_intr_mask = ACTMON_DEV_INTR_AVG_BELOW_WMARK |
> +			ACTMON_DEV_INTR_AVG_ABOVE_WMARK;
> +
> +	if (intr_status & avg_intr_mask)
> +		tegra_devfreq_update_avg_wmark(tegra, dev);
> +
>  	if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_UPPER) {
>  		/*
>  		 * new_boost = min(old_boost * up_coef + step, max_freq)
> @@ -253,8 +390,6 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  
>  		if (dev->boost_freq >= tegra->max_freq)
>  			dev->boost_freq = tegra->max_freq;
> -		else
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
>  	} else if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_LOWER) {
>  		/*
>  		 * new_boost = old_boost * down_coef
> @@ -263,63 +398,37 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  		dev->boost_freq = do_percent(dev->boost_freq,
>  					     dev->config->boost_down_coeff);
>  
> -		dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
> -
>  		if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1))
>  			dev->boost_freq = 0;
> -		else
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
>  	}
>  
> -	if (dev->config->avg_dependency_threshold) {
> -		if (dev->avg_count >= dev->config->avg_dependency_threshold)
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
> -		else if (dev->boost_freq == 0)
> -			dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
> +	if (intr_status & avg_intr_mask) {
> +		/*
> +		 * Once average watermark is hit, it means that the memory
> +		 * activity changed significantly and thus boosting-up shall
> +		 * be reset because EMC clock rate will be changed and
> +		 * boosting will restart in this case.
> +		 */
> +		dev->boost_freq = 0;
>  	}
>  
> -	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
> +	/* no boosting => no need for consecutive-down interrupt */
> +	if (dev->boost_freq == 0)
> +		dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
>  
> +	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
>  	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
>  }
>  
> -static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
> -					    unsigned long cpu_freq)
> -{
> -	unsigned int i;
> -	struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
> -
> -	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
> -		if (cpu_freq >= ratio->cpu_freq) {
> -			if (ratio->emc_freq >= tegra->max_freq)
> -				return tegra->max_freq;
> -			else
> -				return ratio->emc_freq;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static void actmon_update_target(struct tegra_devfreq *tegra,
> -				 struct tegra_devfreq_device *dev)
> +static unsigned long actmon_update_target(struct tegra_devfreq *tegra,
> +					  struct tegra_devfreq_device *dev)
>  {
> -	unsigned long cpu_freq = 0;
> -	unsigned long static_cpu_emc_freq = 0;
> -	unsigned int avg_sustain_coef;
> -
> -	if (dev->config->avg_dependency_threshold) {
> -		cpu_freq = cpufreq_get(0);
> -		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
> -	}
> +	unsigned long target_freq;
>  
> -	dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD;
> -	avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
> -	dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef);
> -	dev->target_freq += dev->boost_freq;
> +	target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD + dev->boost_freq;
> +	target_freq = tegra_actmon_account_cpu_freq(tegra, dev, target_freq);
>  
> -	if (dev->avg_count >= dev->config->avg_dependency_threshold)
> -		dev->target_freq = max(dev->target_freq, static_cpu_emc_freq);
> +	return target_freq;
>  }
>  
>  static irqreturn_t actmon_thread_isr(int irq, void *data)
> @@ -351,8 +460,8 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  				       unsigned long action, void *ptr)
>  {
>  	struct clk_notifier_data *data = ptr;
> -	struct tegra_devfreq *tegra;
>  	struct tegra_devfreq_device *dev;
> +	struct tegra_devfreq *tegra;
>  	unsigned int i;
>  
>  	if (action != POST_RATE_CHANGE)
> @@ -360,12 +469,28 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  
>  	tegra = container_of(nb, struct tegra_devfreq, rate_change_nb);
>  
> -	tegra->cur_freq = data->new_rate / KHZ;
> -
> +	/*
> +	 * EMC rate could change due to three reasons:
> +	 *
> +	 *    1. Average watermark hit
> +	 *    2. Boosting overflow
> +	 *    3. CPU freq change
> +	 *
> +	 * Once rate is changed, the consecutive watermarks need to be
> +	 * updated in order for boosting to work properly and to avoid
> +	 * unnecessary interrupts. Note that the consecutive range is set for
> +	 * all of devices using the same rate, hence if CPU is doing much
> +	 * less than the other memory clients, then its upper watermark will
> +	 * be very high in comparison to the actual activity (lower watermark)
> +	 * and thus unnecessary upper-interrupts will be suppressed.
> +	 *
> +	 * The average watermarks also should be updated because of 3.
> +	 */
>  	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
>  		dev = &tegra->devices[i];
>  
> -		tegra_devfreq_update_wmark(tegra, dev);
> +		tegra_devfreq_update_avg_wmark(tegra, dev);
> +		tegra_devfreq_update_wmark(tegra, dev, data->new_rate);
>  	}
>  
>  	return NOTIFY_OK;
> @@ -374,15 +499,14 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
>  					  struct tegra_devfreq_device *dev)
>  {
> -	u32 val = 0;
> -
> -	dev->target_freq = tegra->cur_freq;
> +	u32 val = 0, target_freq;
>  
> -	dev->avg_count = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
> +	target_freq = clk_get_rate(tegra->emc_clock) / KHZ;
> +	dev->avg_count = target_freq * ACTMON_SAMPLING_PERIOD;
>  	device_writel(dev, dev->avg_count, ACTMON_DEV_INIT_AVG);
>  
>  	tegra_devfreq_update_avg_wmark(tegra, dev);
> -	tegra_devfreq_update_wmark(tegra, dev);
> +	tegra_devfreq_update_wmark(tegra, dev, target_freq);
>  
>  	device_writel(dev, ACTMON_COUNT_WEIGHT, ACTMON_DEV_COUNT_WEIGHT);
>  	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
> @@ -469,13 +593,13 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
>  	struct tegra_devfreq_device *actmon_dev;
>  	unsigned long cur_freq;
>  
> -	cur_freq = READ_ONCE(tegra->cur_freq);
> +	cur_freq = clk_get_rate(tegra->emc_clock);
>  
>  	/* To be used by the tegra governor */
>  	stat->private_data = tegra;
>  
>  	/* The below are to be used by the other governors */
> -	stat->current_frequency = cur_freq * KHZ;
> +	stat->current_frequency = cur_freq;
>  
>  	actmon_dev = &tegra->devices[MCALL];
>  
> @@ -486,7 +610,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
>  	stat->busy_time *= 100 / BUS_SATURATION_RATIO;
>  
>  	/* Number of cycles in a sampling period */
> -	stat->total_time = ACTMON_SAMPLING_PERIOD * cur_freq;
> +	stat->total_time = cur_freq / KHZ * ACTMON_SAMPLING_PERIOD;
>  
>  	stat->busy_time = min(stat->busy_time, stat->total_time);
>  
> @@ -505,6 +629,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
>  	struct devfreq_dev_status *stat;
>  	struct tegra_devfreq *tegra;
>  	struct tegra_devfreq_device *dev;
> +	unsigned long dev_target_freq;
>  	unsigned long target_freq = 0;
>  	unsigned int i;
>  	int err;
> @@ -520,9 +645,9 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
>  	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
>  		dev = &tegra->devices[i];
>  
> -		actmon_update_target(tegra, dev);
> +		dev_target_freq = actmon_update_target(tegra, dev);
>  
> -		target_freq = max(target_freq, dev->target_freq);
> +		target_freq = max(target_freq, dev_target_freq);
>  	}
>  
>  	*freq = target_freq * KHZ;
> @@ -642,7 +767,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
>  		return rate;
>  	}
>  
> -	tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
>  	tegra->max_freq = rate / KHZ;
>  
>  	for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) {
> @@ -671,7 +795,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
>  	platform_set_drvdata(pdev, tegra);
>  
>  	tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb;
> -	err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb);
> +	err = clk_notifier_register(tegra->emc_clock,
> +				    &tegra->rate_change_nb);
>  	if (err) {
>  		dev_err(&pdev->dev,
>  			"Failed to register rate change notifier\n");
> 


Maybe, it is possible to merge patch4/patch19/patch20 to one patch.


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ