linux-kernel - Re: [PATCH] media: rc: pwm-ir-tx: use hrtimer for edge generation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZSklLLWaxpS98Agc@gofer.mess.org>
Date:   Fri, 13 Oct 2023 12:08:28 +0100
From:   Sean Young <sean@...s.org>
To:     Ivaylo Dimitrov <ivo.g.dimitrov.75@...il.com>
Cc:     mchehab@...nel.org, linux-media@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-rt-users@...r.kernel.org
Subject: Re: [PATCH] media: rc: pwm-ir-tx: use hrtimer for edge generation

On Thu, Oct 05, 2023 at 01:28:59PM +0300, Ivaylo Dimitrov wrote:
> usleep_range() seems to suffer from scheduling latency of up to 400 us on
> some platforms (like OMAP) which makes it unusable for IR pulses edge
> timings. In the same time pwm_ir_trx() is called in a context with priority
> which might not be suitable for real-time IR pulses generation.
> 
> Fix that by using hrtimer and a thread with sched_set_fifo() priority. That
> way scheduling latency is compensated by the fact that PWM is controlled in
> the thread after a completion is signalled in hrtimer function - we have
> more or less the same latency for every edge. If completion comes earlier
> than needed, we do udelay() till the exact time for the next edge. That
> way pulse width generation is robust and precise and mostly independent of
> the system load.
> 
> Tests on Nokia N900 showed that udelay() is called with up to 200 us in
> worst cases, usually times are less that 100 us.

Cc the RT folks, I would like some input on this. How bad is this for
linux-rt or non-linux-rt for latency? Is this a problem, and if so is there
an #ifdef we can use to avoid it in certain builds?

> Signed-off-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@...il.com>
> ---
>  drivers/media/rc/pwm-ir-tx.c | 122 ++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 115 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> index 7732054c..cb6ce73 100644
> --- a/drivers/media/rc/pwm-ir-tx.c
> +++ b/drivers/media/rc/pwm-ir-tx.c
> @@ -4,6 +4,7 @@
>   */
>  
>  #include <linux/kernel.h>
> +#include <linux/kthread.h>
>  #include <linux/module.h>
>  #include <linux/pwm.h>
>  #include <linux/delay.h>
> @@ -17,8 +18,16 @@
>  
>  struct pwm_ir {
>  	struct pwm_device *pwm;
> +	struct hrtimer timer;
> +	struct task_struct *tx_thread;
> +	wait_queue_head_t tx_wq;
> +	struct completion tx_done;
> +	struct completion edge;
>  	unsigned int carrier;
>  	unsigned int duty_cycle;
> +	unsigned int *txbuf;
> +	unsigned int count;
> +	unsigned int index;
>  };
>  
>  static const struct of_device_id pwm_ir_of_match[] = {
> @@ -48,12 +57,41 @@ static int pwm_ir_set_carrier(struct rc_dev *dev, u32 carrier)
>  	return 0;
>  }
>  
> -static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> -		     unsigned int count)
> +static enum hrtimer_restart pwm_ir_timer_cb(struct hrtimer *timer)
> +{
> +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> +	ktime_t now;
> +
> +	/*
> +	 * If we happen to hit an odd latency spike, loop through the
> +	 * pulses until we catch up.
> +	 */
> +	do {
> +		u64 edge;
> +
> +		complete(&pwm_ir->edge);
> +
> +		if (pwm_ir->index >= pwm_ir->count)
> +			return HRTIMER_NORESTART;
> +
> +		edge = US_TO_NS(pwm_ir->txbuf[pwm_ir->index]);
> +		hrtimer_add_expires_ns(timer, edge);
> +
> +		pwm_ir->index++;
> +
> +		now = timer->base->get_time();
> +
> +	} while (hrtimer_get_expires_tv64(timer) < now);
> +
> +	return HRTIMER_RESTART;
> +}
> +
> +static void _pwm_ir_tx(struct pwm_ir *pwm_ir)
>  {
> -	struct pwm_ir *pwm_ir = dev->priv;
>  	struct pwm_device *pwm = pwm_ir->pwm;
>  	struct pwm_state state;
> +	unsigned int *txbuf = pwm_ir->txbuf;
> +	unsigned int count = pwm_ir->count;
>  	int i;
>  	ktime_t edge;
>  	long delta;
> @@ -63,6 +101,8 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>  	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
>  	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
>  
> +	hrtimer_start(&pwm_ir->timer, 0, HRTIMER_MODE_REL);
> +	wait_for_completion(&pwm_ir->edge);
>  	edge = ktime_get();
>  
>  	for (i = 0; i < count; i++) {
> @@ -70,14 +110,50 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>  		pwm_apply_state(pwm, &state);
>  
>  		edge = ktime_add_us(edge, txbuf[i]);
> +		wait_for_completion(&pwm_ir->edge);
> +
>  		delta = ktime_us_delta(edge, ktime_get());
> +
>  		if (delta > 0)
> -			usleep_range(delta, delta + 10);
> +			udelay(delta);

So you're sleeping if the timer arrives early, but what about if it arrives
late? No amount of sleeping will here there.

>  	}
>  
>  	state.enabled = false;
>  	pwm_apply_state(pwm, &state);
>  
> +	pwm_ir->count = 0;
> +}
> +
> +static int pwm_ir_thread(void *data)
> +{
> +	struct pwm_ir *pwm_ir = data;
> +
> +	for (;;) {
> +		wait_event_idle(pwm_ir->tx_wq,
> +				kthread_should_stop() || pwm_ir->count);
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		_pwm_ir_tx(pwm_ir);
> +		complete(&pwm_ir->tx_done);
> +	}
> +
> +	return 0;
> +}

Rather than creating a new thread, we could use re-use the process that
is doing the TX, i.e. put this in pwm_ir_tx() itself. That should work,
right?

> +
> +static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> +		     unsigned int count)
> +{
> +	struct pwm_ir *pwm_ir = dev->priv;
> +
> +	pwm_ir->txbuf = txbuf;
> +	pwm_ir->count = count;
> +	pwm_ir->index = 0;
> +
> +	wake_up(&pwm_ir->tx_wq);
> +	wait_for_completion(&pwm_ir->tx_done);
> +
>  	return count;
>  }
>  
> @@ -91,12 +167,24 @@ static int pwm_ir_probe(struct platform_device *pdev)
>  	if (!pwm_ir)
>  		return -ENOMEM;
>  
> +	platform_set_drvdata(pdev, pwm_ir);
> +
>  	pwm_ir->pwm = devm_pwm_get(&pdev->dev, NULL);
>  	if (IS_ERR(pwm_ir->pwm))
>  		return PTR_ERR(pwm_ir->pwm);
>  
> -	pwm_ir->carrier = 38000;
> +	/* Use default, in case userspace does not set the carrier */
> +	pwm_ir->carrier = DIV_ROUND_CLOSEST_ULL(pwm_get_period(pwm_ir->pwm),
> +						NSEC_PER_SEC);
>  	pwm_ir->duty_cycle = 50;
> +	pwm_ir->count = 0;
> +
> +	init_waitqueue_head(&pwm_ir->tx_wq);
> +	init_completion(&pwm_ir->edge);
> +	init_completion(&pwm_ir->tx_done);
> +
> +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	pwm_ir->timer.function = pwm_ir_timer_cb;
>  
>  	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
>  	if (!rcdev)
> @@ -110,14 +198,34 @@ static int pwm_ir_probe(struct platform_device *pdev)
>  	rcdev->s_tx_carrier = pwm_ir_set_carrier;
>  
>  	rc = devm_rc_register_device(&pdev->dev, rcdev);
> -	if (rc < 0)
> +	if (rc < 0) {
>  		dev_err(&pdev->dev, "failed to register rc device\n");
> +		return rc;
> +	}
> +
> +	pwm_ir->tx_thread = kthread_create(pwm_ir_thread, pwm_ir, "%s/tx",
> +					   dev_name(&pdev->dev));
> +	if (IS_ERR(pwm_ir->tx_thread))
> +		return PTR_ERR(pwm_ir->tx_thread);
>  
> -	return rc;
> +	sched_set_fifo(pwm_ir->tx_thread);
> +	wake_up_process(pwm_ir->tx_thread);
> +
> +	return 0;
> +}
> +
> +static int pwm_ir_remove(struct platform_device *pdev)
> +{
> +	struct pwm_ir *pwm_ir = platform_get_drvdata(pdev);
> +
> +	kthread_stop(pwm_ir->tx_thread);
> +
> +	return 0;
>  }
>  
>  static struct platform_driver pwm_ir_driver = {
>  	.probe = pwm_ir_probe,
> +	.remove = pwm_ir_remove,
>  	.driver = {
>  		.name	= DRIVER_NAME,
>  		.of_match_table = pwm_ir_of_match,
> -- 
> 1.9.1