lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <81238735-3727-451c-acc0-703bba7ecff5@gmail.com>
Date:   Fri, 11 Aug 2023 19:39:51 +0300
From:   Péter Ujfalusi <peter.ujfalusi@...il.com>
To:     Tomi Valkeinen <tomi.valkeinen@...asonboard.com>,
        Andrzej Hajda <andrzej.hajda@...el.com>,
        Neil Armstrong <neil.armstrong@...aro.org>,
        Robert Foss <rfoss@...nel.org>,
        Laurent Pinchart <Laurent.pinchart@...asonboard.com>,
        Jonas Karlman <jonas@...boo.se>,
        Jernej Skrabec <jernej.skrabec@...il.com>,
        David Airlie <airlied@...il.com>,
        Daniel Vetter <daniel@...ll.ch>,
        Francesco Dolcini <francesco@...cini.it>
Cc:     dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org,
        Aradhya Bhatia <a-bhatia1@...com>
Subject: Re: [PATCH 10/11] drm/bridge: tc358768: Attempt to fix DSI horizontal
 timings



On 04/08/2023 13:44, Tomi Valkeinen wrote:
> The DSI horizontal timing calculations done by the driver seem to often
> lead to underflows or overflows, depending on the videomode.
> 
> There are two main things the current driver doesn't seem to get right:
> DSI HSW and HFP, and VSDly. However, even following Toshiba's
> documentation it seems we don't always get a working display.
> 
> This patch attempts to fix the horizontal timings for DSI event mode, and
> on a system with a DSI->HDMI encoder, a lot of standard HDMI modes now
> seem to work. The work relies on Toshiba's documentation, but also quite
> a bit on empirical testing.
> 
> This also adds timing related debug prints to make it easier to improve
> on this later.
> 
> The DSI pulse mode has only been tested with a fixed-resolution panel,
> which limits the testing of different modes on DSI pulse mode. However,
> as the VSDly calculation also affects pulse mode, so this might cause a
> regression.

If my memory serves me right we only had this used in a sinlge, static
configuration and again, it might be mentioned in a comment somewhere?

Nice work!

Reviewed-by: Peter Ujfalusi <peter.ujfalusi@...il.com>

> 
> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@...asonboard.com>
> ---
>  drivers/gpu/drm/bridge/tc358768.c | 211 +++++++++++++++++++++++++++++++++-----
>  1 file changed, 183 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c
> index dc2241c18dde..ea19de5509ed 100644
> --- a/drivers/gpu/drm/bridge/tc358768.c
> +++ b/drivers/gpu/drm/bridge/tc358768.c
> @@ -9,6 +9,7 @@
>  #include <linux/gpio/consumer.h>
>  #include <linux/i2c.h>
>  #include <linux/kernel.h>
> +#include <linux/math64.h>
>  #include <linux/media-bus-format.h>
>  #include <linux/minmax.h>
>  #include <linux/module.h>
> @@ -157,6 +158,7 @@ struct tc358768_priv {
>  	u32 frs;	/* PLL Freqency range for HSCK (post divider) */
>  
>  	u32 dsiclk;	/* pll_clk / 2 */
> +	u32 pclk;	/* incoming pclk rate */
>  };
>  
>  static inline struct tc358768_priv *dsi_host_to_tc358768(struct mipi_dsi_host
> @@ -380,6 +382,7 @@ static int tc358768_calc_pll(struct tc358768_priv *priv,
>  	priv->prd = best_prd;
>  	priv->frs = frs;
>  	priv->dsiclk = best_pll / 2;
> +	priv->pclk = mode->clock * 1000;
>  
>  	return 0;
>  }
> @@ -638,6 +641,28 @@ static u32 tc358768_ps_to_ns(u32 ps)
>  	return ps / 1000;
>  }
>  
> +static u32 tc358768_dpi_to_ns(u32 val, u32 pclk)
> +{
> +	return (u32)div_u64((u64)val * NANO, pclk);
> +}
> +
> +/* Convert value in DPI pixel clock units to DSI byte count */
> +static u32 tc358768_dpi_to_dsi_bytes(struct tc358768_priv *priv, u32 val)
> +{
> +	u64 m = (u64)val * priv->dsiclk / 4 * priv->dsi_lanes;
> +	u64 n = priv->pclk;
> +
> +	return (u32)div_u64(m + n - 1, n);
> +}
> +
> +static u32 tc358768_dsi_bytes_to_ns(struct tc358768_priv *priv, u32 val)
> +{
> +	u64 m = (u64)val * NANO;
> +	u64 n = priv->dsiclk / 4 * priv->dsi_lanes;
> +
> +	return (u32)div_u64(m, n);
> +}
> +
>  static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  {
>  	struct tc358768_priv *priv = bridge_to_tc358768(bridge);
> @@ -647,11 +672,19 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  	s32 raw_val;
>  	const struct drm_display_mode *mode;
>  	u32 hsbyteclk_ps, dsiclk_ps, ui_ps;
> -	u32 dsiclk, hsbyteclk, video_start;
> -	const u32 internal_delay = 40;
> +	u32 dsiclk, hsbyteclk;
>  	int ret, i;
>  	struct videomode vm;
>  	struct device *dev = priv->dev;
> +	/* In pixelclock units */
> +	u32 dpi_htot, dpi_data_start;
> +	/* In byte units */
> +	u32 dsi_dpi_htot, dsi_dpi_data_start;
> +	u32 dsi_hsw, dsi_hbp, dsi_hact, dsi_hfp;
> +	const u32 dsi_hss = 4; /* HSS is a short packet (4 bytes) */
> +	/* In hsbyteclk units */
> +	u32 dsi_vsdly;
> +	const u32 internal_dly = 40;
>  
>  	if (mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) {
>  		dev_warn_once(dev, "Non-continuous mode unimplemented, falling back to continuous\n");
> @@ -686,27 +719,23 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  	case MIPI_DSI_FMT_RGB888:
>  		val |= (0x3 << 4);
>  		hact = vm.hactive * 3;
> -		video_start = (vm.hsync_len + vm.hback_porch) * 3;
>  		data_type = MIPI_DSI_PACKED_PIXEL_STREAM_24;
>  		break;
>  	case MIPI_DSI_FMT_RGB666:
>  		val |= (0x4 << 4);
>  		hact = vm.hactive * 3;
> -		video_start = (vm.hsync_len + vm.hback_porch) * 3;
>  		data_type = MIPI_DSI_PACKED_PIXEL_STREAM_18;
>  		break;
>  
>  	case MIPI_DSI_FMT_RGB666_PACKED:
>  		val |= (0x4 << 4) | BIT(3);
>  		hact = vm.hactive * 18 / 8;
> -		video_start = (vm.hsync_len + vm.hback_porch) * 18 / 8;
>  		data_type = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
>  		break;
>  
>  	case MIPI_DSI_FMT_RGB565:
>  		val |= (0x5 << 4);
>  		hact = vm.hactive * 2;
> -		video_start = (vm.hsync_len + vm.hback_porch) * 2;
>  		data_type = MIPI_DSI_PACKED_PIXEL_STREAM_16;
>  		break;
>  	default:
> @@ -716,9 +745,150 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  		return;
>  	}
>  
> +	/*
> +	 * There are three important things to make TC358768 work correctly,
> +	 * which are not trivial to manage:
> +	 *
> +	 * 1. Keep the DPI line-time and the DSI line-time as close to each
> +	 *    other as possible.
> +	 * 2. TC358768 goes to LP mode after each line's active area. The DSI
> +	 *    HFP period has to be long enough for entering and exiting LP mode.
> +	 *    But it is not clear how to calculate this.
> +	 * 3. VSDly (video start delay) has to be long enough to ensure that the
> +	 *    DSI TX does not start transmitting util we have started receiving
> +	 *    pixel data from the DPI input. It is not clear how to calculate
> +	 *    this either.
> +	 */
> +
> +	dpi_htot = vm.hactive + vm.hfront_porch + vm.hsync_len + vm.hback_porch;
> +	dpi_data_start = vm.hsync_len + vm.hback_porch;
> +
> +	dev_dbg(dev, "dpi horiz timing (pclk): %u + %u + %u + %u = %u\n",
> +		vm.hsync_len, vm.hback_porch, vm.hactive, vm.hfront_porch,
> +		dpi_htot);
> +
> +	dev_dbg(dev, "dpi horiz timing (ns): %u + %u + %u + %u = %u\n",
> +		tc358768_dpi_to_ns(vm.hsync_len, vm.pixelclock),
> +		tc358768_dpi_to_ns(vm.hback_porch, vm.pixelclock),
> +		tc358768_dpi_to_ns(vm.hactive, vm.pixelclock),
> +		tc358768_dpi_to_ns(vm.hfront_porch, vm.pixelclock),
> +		tc358768_dpi_to_ns(dpi_htot, vm.pixelclock));
> +
> +	dev_dbg(dev, "dpi data start (ns): %u + %u = %u\n",
> +		tc358768_dpi_to_ns(vm.hsync_len, vm.pixelclock),
> +		tc358768_dpi_to_ns(vm.hback_porch, vm.pixelclock),
> +		tc358768_dpi_to_ns(dpi_data_start, vm.pixelclock));
> +
> +	dsi_dpi_htot = tc358768_dpi_to_dsi_bytes(priv, dpi_htot);
> +	dsi_dpi_data_start = tc358768_dpi_to_dsi_bytes(priv, dpi_data_start);
> +
> +	if (dsi_dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
> +		dsi_hsw = tc358768_dpi_to_dsi_bytes(priv, vm.hsync_len);
> +		dsi_hbp = tc358768_dpi_to_dsi_bytes(priv, vm.hback_porch);
> +	} else {
> +		/* HBP is included in HSW in event mode */
> +		dsi_hbp = 0;
> +		dsi_hsw = tc358768_dpi_to_dsi_bytes(priv,
> +			vm.hsync_len + vm.hback_porch);
> +
> +		/*
> +		 * The pixel packet includes the actual pixel data, and:
> +		 * DSI packet header = 4 bytes
> +		 * DCS code = 1 byte
> +		 * DSI packet footer = 2 bytes
> +		 */
> +		dsi_hact = hact + 4 + 1 + 2;
> +
> +		dsi_hfp = dsi_dpi_htot - dsi_hact - dsi_hsw - dsi_hss;
> +
> +		/*
> +		 * Here we should check if HFP is long enough for entering LP
> +		 * and exiting LP, but it's not clear how to calculate that.
> +		 * Instead, this is a naive algorithm that just adjusts the HFP
> +		 * and HSW so that HFP is (at least) roughly 2/3 of the total
> +		 * blanking time.
> +		 */
> +		if (dsi_hfp < (dsi_hfp + dsi_hsw + dsi_hss) * 2 / 3) {
> +			u32 old_hfp = dsi_hfp;
> +			u32 old_hsw = dsi_hsw;
> +			u32 tot = dsi_hfp + dsi_hsw + dsi_hss;
> +
> +			dsi_hsw = tot / 3;
> +
> +			/*
> +			 * Seems like sometimes HSW has to be divisible by num-lanes, but
> +			 * not always...
> +			 */
> +			dsi_hsw = roundup(dsi_hsw, priv->dsi_lanes);
> +
> +			dsi_hfp = dsi_dpi_htot - dsi_hact - dsi_hsw - dsi_hss;
> +
> +			dev_dbg(dev,
> +				"hfp too short, adjusting dsi hfp and dsi hsw from %u, %u to %u, %u\n",
> +				old_hfp, old_hsw, dsi_hfp, dsi_hsw);
> +		}
> +
> +		dev_dbg(dev,
> +			"dsi horiz timing (bytes): %u, %u + %u + %u + %u = %u\n",
> +			dsi_hss, dsi_hsw, dsi_hbp, dsi_hact, dsi_hfp,
> +			dsi_hss + dsi_hsw + dsi_hbp + dsi_hact + dsi_hfp);
> +
> +		dev_dbg(dev, "dsi horiz timing (ns): %u + %u + %u + %u + %u = %u\n",
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hss),
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hsw),
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hbp),
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hact),
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hfp),
> +			tc358768_dsi_bytes_to_ns(priv, dsi_hss + dsi_hsw + dsi_hbp + dsi_hact + dsi_hfp));
> +	}
> +
> +	/* VSDly calculation */
> +
> +	/* Start with the HW internal delay */
> +	dsi_vsdly = internal_dly;
> +
> +	/* Convert to byte units as the other variables are in byte units */
> +	dsi_vsdly *= priv->dsi_lanes;
> +
> +	/* Do we need more delay, in addition to the internal? */
> +	if (dsi_dpi_data_start > dsi_vsdly + dsi_hss + dsi_hsw + dsi_hbp) {
> +		dsi_vsdly = dsi_dpi_data_start - dsi_hss - dsi_hsw - dsi_hbp;
> +		dsi_vsdly = roundup(dsi_vsdly, priv->dsi_lanes);
> +	}
> +
> +	dev_dbg(dev, "dsi data start (bytes) %u + %u + %u + %u = %u\n",
> +		dsi_vsdly, dsi_hss, dsi_hsw, dsi_hbp,
> +		dsi_vsdly + dsi_hss + dsi_hsw + dsi_hbp);
> +
> +	dev_dbg(dev, "dsi data start (ns) %u + %u + %u + %u = %u\n",
> +		tc358768_dsi_bytes_to_ns(priv, dsi_vsdly),
> +		tc358768_dsi_bytes_to_ns(priv, dsi_hss),
> +		tc358768_dsi_bytes_to_ns(priv, dsi_hsw),
> +		tc358768_dsi_bytes_to_ns(priv, dsi_hbp),
> +		tc358768_dsi_bytes_to_ns(priv, dsi_vsdly + dsi_hss + dsi_hsw + dsi_hbp));
> +
> +	/* Convert back to hsbyteclk */
> +	dsi_vsdly /= priv->dsi_lanes;
> +
> +	/*
> +	 * The docs say that there is an internal delay of 40 cycles.
> +	 * However, we get underflows if we follow that rule. If we
> +	 * instead ignore the internal delay, things work. So either
> +	 * the docs are wrong or the calculations are wrong.
> +	 *
> +	 * As a temporary fix, add the internal delay here, to counter
> +	 * the subtraction when writing the register.
> +	 */
> +	dsi_vsdly += internal_dly;
> +
> +	/* Clamp to the register max */
> +	if (dsi_vsdly - internal_dly > 0x3ff) {
> +		dev_warn(dev, "VSDly too high, underflows likely\n");
> +		dsi_vsdly = 0x3ff + internal_dly;
> +	}
> +
>  	/* VSDly[9:0] */
> -	video_start = max(video_start, internal_delay + 1) - internal_delay;
> -	tc358768_write(priv, TC358768_VSDLY, video_start);
> +	tc358768_write(priv, TC358768_VSDLY, dsi_vsdly - internal_dly);
>  
>  	tc358768_write(priv, TC358768_DATAFMT, val);
>  	tc358768_write(priv, TC358768_DSITX_DT, data_type);
> @@ -826,18 +996,6 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  
>  		/* vbp */
>  		tc358768_write(priv, TC358768_DSI_VBPR, vm.vback_porch);
> -
> -		/* hsw * byteclk * ndl / pclk */
> -		val = (u32)div_u64(vm.hsync_len *
> -				   (u64)hsbyteclk * priv->dsi_lanes,
> -				   vm.pixelclock);
> -		tc358768_write(priv, TC358768_DSI_HSW, val);
> -
> -		/* hbp * byteclk * ndl / pclk */
> -		val = (u32)div_u64(vm.hback_porch *
> -				   (u64)hsbyteclk * priv->dsi_lanes,
> -				   vm.pixelclock);
> -		tc358768_write(priv, TC358768_DSI_HBPR, val);
>  	} else {
>  		/* Set event mode */
>  		tc358768_write(priv, TC358768_DSI_EVENT, 1);
> @@ -851,16 +1009,13 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
>  
>  		/* vbp (not used in event mode) */
>  		tc358768_write(priv, TC358768_DSI_VBPR, 0);
> +	}
>  
> -		/* (hsw + hbp) * byteclk * ndl / pclk */
> -		val = (u32)div_u64((vm.hsync_len + vm.hback_porch) *
> -				   (u64)hsbyteclk * priv->dsi_lanes,
> -				   vm.pixelclock);
> -		tc358768_write(priv, TC358768_DSI_HSW, val);
> +	/* hsw (bytes) */
> +	tc358768_write(priv, TC358768_DSI_HSW, dsi_hsw);
>  
> -		/* hbp (not used in event mode) */
> -		tc358768_write(priv, TC358768_DSI_HBPR, 0);
> -	}
> +	/* hbp (bytes) */
> +	tc358768_write(priv, TC358768_DSI_HBPR, dsi_hbp);
>  
>  	/* hact (bytes) */
>  	tc358768_write(priv, TC358768_DSI_HACT, hact);
> 

-- 
Péter

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ