lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e2c590a8-b77a-4878-891c-3e0f977e2d09@collabora.com>
Date: Tue, 23 Sep 2025 14:57:41 +0200
From: Benjamin Gaignard <benjamin.gaignard@...labora.com>
To: Nicolas Dufresne <nicolas.dufresne@...labora.com>,
 Philipp Zabel <p.zabel@...gutronix.de>,
 Mauro Carvalho Chehab <mchehab@...nel.org>, Shawn Guo <shawnguo@...nel.org>,
 Sascha Hauer <s.hauer@...gutronix.de>,
 Pengutronix Kernel Team <kernel@...gutronix.de>,
 Fabio Estevam <festevam@...il.com>, Jernej Skrabec
 <jernej.skrabec@...il.com>, Hans Verkuil <hverkuil@...nel.org>,
 Ezequiel Garcia <ezequiel@...guardiasur.com.ar>
Cc: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>,
 linux-media@...r.kernel.org, linux-rockchip@...ts.infradead.org,
 linux-kernel@...r.kernel.org, imx@...ts.linux.dev,
 linux-arm-kernel@...ts.infradead.org, kernel@...labora.com,
 Stable@...r.kernel.org
Subject: Re: [PATCH 1/2] media: verisilicon: Fix CPU stalls on G2 bus error


Le 22/09/2025 à 20:43, Nicolas Dufresne a écrit :
> In some seek stress tests, we are getting IRQ from the G2 decoder where
> the dec_bus_int and the dec_e bits are high, meaning the decoder is
> still running despite the error.
>
> Fix this by reworking the IRQ handler to only finish the job once we
> have reached completion and move the software reset to when our software
> watchdog triggers.
>
> This way, we let the hardware continue on errors when it did not self
> reset and in worse case scenario the hardware timeout will
> automatically stop it. The actual error will be fixed in a follow up
> patch.
>
> Fixes: 3385c514ecc5a ("media: hantro: Convert imx8m_vpu_g2_irq to helper")
> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@...labora.com>

Reviewed-by: Benjamin Gaignard <benjamin.gaignard@...labora.com>

> ---
>   drivers/media/platform/verisilicon/hantro_g2.c     | 88 +++++++++++++++++-----
>   .../platform/verisilicon/hantro_g2_hevc_dec.c      |  2 -
>   .../media/platform/verisilicon/hantro_g2_regs.h    | 13 ++++
>   .../media/platform/verisilicon/hantro_g2_vp9_dec.c |  2 -
>   drivers/media/platform/verisilicon/hantro_hw.h     |  1 +
>   drivers/media/platform/verisilicon/imx8m_vpu_hw.c  |  2 +
>   6 files changed, 85 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/media/platform/verisilicon/hantro_g2.c b/drivers/media/platform/verisilicon/hantro_g2.c
> index aae0b562fabb1732b08bcd88b9007749a7849ba6..318673b66da884b89f31777dd8e69a934080cebf 100644
> --- a/drivers/media/platform/verisilicon/hantro_g2.c
> +++ b/drivers/media/platform/verisilicon/hantro_g2.c
> @@ -5,43 +5,93 @@
>    * Copyright (C) 2021 Collabora Ltd, Andrzej Pietrasiewicz <andrzej.p@...labora.com>
>    */
>   
> +#include <linux/delay.h>
>   #include "hantro_hw.h"
>   #include "hantro_g2_regs.h"
>   
>   #define G2_ALIGN	16
>   
> -void hantro_g2_check_idle(struct hantro_dev *vpu)
> +static bool hantro_g2_active(struct hantro_ctx *ctx)
>   {
> -	int i;
> -
> -	for (i = 0; i < 3; i++) {
> -		u32 status;
> -
> -		/* Make sure the VPU is idle */
> -		status = vdpu_read(vpu, G2_REG_INTERRUPT);
> -		if (status & G2_REG_INTERRUPT_DEC_E) {
> -			dev_warn(vpu->dev, "device still running, aborting");
> -			status |= G2_REG_INTERRUPT_DEC_ABORT_E | G2_REG_INTERRUPT_DEC_IRQ_DIS;
> -			vdpu_write(vpu, status, G2_REG_INTERRUPT);
> -		}
> +	struct hantro_dev *vpu = ctx->dev;
> +	u32 status;
> +
> +	status = vdpu_read(vpu, G2_REG_INTERRUPT);
> +
> +	return (status & G2_REG_INTERRUPT_DEC_E);
> +}
> +
> +/**
> + * hantro_g2_reset:
> + * @ctx: the hantro context
> + *
> + * Emulates a reset using Hantro abort function. Failing this procedure would
> + * results in programming a running IP which leads to CPU hang.
> + *
> + * Using a hard reset procedure instead is prefferred.
> + */
> +void hantro_g2_reset(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	u32 status;
> +
> +	status = vdpu_read(vpu, G2_REG_INTERRUPT);
> +	if (status & G2_REG_INTERRUPT_DEC_E) {
> +		dev_warn_ratelimited(vpu->dev, "device still running, aborting");
> +		status |= G2_REG_INTERRUPT_DEC_ABORT_E | G2_REG_INTERRUPT_DEC_IRQ_DIS;
> +		vdpu_write(vpu, status, G2_REG_INTERRUPT);
> +
> +		do {
> +			mdelay(1);
> +		} while (hantro_g2_active(ctx));
>   	}
>   }
>   
>   irqreturn_t hantro_g2_irq(int irq, void *dev_id)
>   {
>   	struct hantro_dev *vpu = dev_id;
> -	enum vb2_buffer_state state;
>   	u32 status;
>   
>   	status = vdpu_read(vpu, G2_REG_INTERRUPT);
> -	state = (status & G2_REG_INTERRUPT_DEC_RDY_INT) ?
> -		 VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
>   
> -	vdpu_write(vpu, 0, G2_REG_INTERRUPT);
> -	vdpu_write(vpu, G2_REG_CONFIG_DEC_CLK_GATE_E, G2_REG_CONFIG);
> +	if (!(status & G2_REG_INTERRUPT_DEC_IRQ))
> +		return IRQ_NONE;
> +
> +	hantro_reg_write(vpu, &g2_dec_irq, 0);
> +	hantro_reg_write(vpu, &g2_dec_int_stat, 0);
> +	hantro_reg_write(vpu, &g2_clk_gate_e, 1);
> +
> +	if (status & G2_REG_INTERRUPT_DEC_RDY_INT) {
> +		hantro_irq_done(vpu, VB2_BUF_STATE_DONE);
> +		return IRQ_HANDLED;
> +	}
> +
> +	if (status & G2_REG_INTERRUPT_DEC_ABORT_INT) {
> +		/* disabled on abort, though lets be safe and handle it */
> +		dev_warn_ratelimited(vpu->dev, "decode operation aborted.");
> +		return IRQ_HANDLED;
> +	}
> +
> +	if (status & G2_REG_INTERRUPT_DEC_LAST_SLICE_INT)
> +		dev_warn_ratelimited(vpu->dev, "not all macroblocks were decoded.");
> +
> +	if (status & G2_REG_INTERRUPT_DEC_BUS_INT)
> +		dev_warn_ratelimited(vpu->dev, "bus error detected.");
> +
> +	if (status & G2_REG_INTERRUPT_DEC_ERROR_INT)
> +		dev_warn_ratelimited(vpu->dev, "decode error detected.");
> +
> +	if (status & G2_REG_INTERRUPT_DEC_TIMEOUT)
> +		dev_warn_ratelimited(vpu->dev, "frame decode timed out.");
>   
> -	hantro_irq_done(vpu, state);
> +	/**
> +	 * If the decoding haven't stopped, let it continue. The hardware timeout
> +	 * will trigger if it is trully stuck.
> +	 */
> +	if (status & G2_REG_INTERRUPT_DEC_E)
> +		return IRQ_HANDLED;
>   
> +	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
>   	return IRQ_HANDLED;
>   }
>   
> diff --git a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
> index 0e212198dd65b1cc27770ca93b14aa96e2772ac4..f066636e56f98560d9b1c5036691e3c34dd13b1f 100644
> --- a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
> +++ b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
> @@ -582,8 +582,6 @@ int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx)
>   	struct hantro_dev *vpu = ctx->dev;
>   	int ret;
>   
> -	hantro_g2_check_idle(vpu);
> -
>   	/* Prepare HEVC decoder context. */
>   	ret = hantro_hevc_dec_prepare_run(ctx);
>   	if (ret)
> diff --git a/drivers/media/platform/verisilicon/hantro_g2_regs.h b/drivers/media/platform/verisilicon/hantro_g2_regs.h
> index b943b1816db7fd11eee063ecd7b082a5ed6c6f90..c614951121c79cf5c3bd4d086e596f60fba12459 100644
> --- a/drivers/media/platform/verisilicon/hantro_g2_regs.h
> +++ b/drivers/media/platform/verisilicon/hantro_g2_regs.h
> @@ -22,7 +22,14 @@
>   #define G2_REG_VERSION			G2_SWREG(0)
>   
>   #define G2_REG_INTERRUPT		G2_SWREG(1)
> +#define G2_REG_INTERRUPT_DEC_LAST_SLICE_INT	BIT(19)
> +#define G2_REG_INTERRUPT_DEC_TIMEOUT	BIT(18)
> +#define G2_REG_INTERRUPT_DEC_ERROR_INT	BIT(16)
> +#define G2_REG_INTERRUPT_DEC_BUF_INT	BIT(14)
> +#define G2_REG_INTERRUPT_DEC_BUS_INT	BIT(13)
>   #define G2_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
> +#define G2_REG_INTERRUPT_DEC_ABORT_INT	BIT(11)
> +#define G2_REG_INTERRUPT_DEC_IRQ	BIT(8)
>   #define G2_REG_INTERRUPT_DEC_ABORT_E	BIT(5)
>   #define G2_REG_INTERRUPT_DEC_IRQ_DIS	BIT(4)
>   #define G2_REG_INTERRUPT_DEC_E		BIT(0)
> @@ -35,6 +42,9 @@
>   #define BUS_WIDTH_128			2
>   #define BUS_WIDTH_256			3
>   
> +#define g2_dec_int_stat		G2_DEC_REG(1, 11, 0xf)
> +#define g2_dec_irq		G2_DEC_REG(1, 8, 0x1)
> +
>   #define g2_strm_swap		G2_DEC_REG(2, 28, 0xf)
>   #define g2_strm_swap_old	G2_DEC_REG(2, 27, 0x1f)
>   #define g2_pic_swap		G2_DEC_REG(2, 22, 0x1f)
> @@ -225,6 +235,9 @@
>   #define vp9_filt_level_seg5	G2_DEC_REG(19,  8, 0x3f)
>   #define vp9_quant_seg5		G2_DEC_REG(19,  0, 0xff)
>   
> +#define g2_timemout_override_e	G2_DEC_REG(45, 31, 0x1)
> +#define g2_timemout_cycles	G2_DEC_REG(45, 0, 0x7fffffff)
> +
>   #define hevc_cur_poc_00		G2_DEC_REG(46, 24, 0xff)
>   #define hevc_cur_poc_01		G2_DEC_REG(46, 16, 0xff)
>   #define hevc_cur_poc_02		G2_DEC_REG(46, 8,  0xff)
> diff --git a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
> index 82a478ac645e1daf730bf7aa53f825508556d3b2..56c79e339030e8076c386d7c48d976e102971d6d 100644
> --- a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
> +++ b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
> @@ -893,8 +893,6 @@ int hantro_g2_vp9_dec_run(struct hantro_ctx *ctx)
>   	struct vb2_v4l2_buffer *dst;
>   	int ret;
>   
> -	hantro_g2_check_idle(ctx->dev);
> -
>   	ret = start_prepare_run(ctx, &decode_params);
>   	if (ret) {
>   		hantro_end_prepare_run(ctx);
> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
> index c9b6556f8b2b78b94e11c6f1374b395ca02f16ff..5f2011529f02dc6f8260f10a579bf1cd4e6c2edf 100644
> --- a/drivers/media/platform/verisilicon/hantro_hw.h
> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
> @@ -583,6 +583,7 @@ void hantro_g2_vp9_dec_done(struct hantro_ctx *ctx);
>   int hantro_vp9_dec_init(struct hantro_ctx *ctx);
>   void hantro_vp9_dec_exit(struct hantro_ctx *ctx);
>   void hantro_g2_check_idle(struct hantro_dev *vpu);
> +void hantro_g2_reset(struct hantro_ctx *ctx);
>   irqreturn_t hantro_g2_irq(int irq, void *dev_id);
>   
>   #endif /* HANTRO_HW_H_ */
> diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
> index f9f276385c11786c4fda9f02f71ec699f57b84f5..5be0e2e76882f1e21359d3e7cf7f6213ee728ea5 100644
> --- a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
> +++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
> @@ -294,11 +294,13 @@ static const struct hantro_codec_ops imx8mq_vpu_g1_codec_ops[] = {
>   static const struct hantro_codec_ops imx8mq_vpu_g2_codec_ops[] = {
>   	[HANTRO_MODE_HEVC_DEC] = {
>   		.run = hantro_g2_hevc_dec_run,
> +		.reset = hantro_g2_reset,
>   		.init = hantro_hevc_dec_init,
>   		.exit = hantro_hevc_dec_exit,
>   	},
>   	[HANTRO_MODE_VP9_DEC] = {
>   		.run = hantro_g2_vp9_dec_run,
> +		.reset = hantro_g2_reset,
>   		.done = hantro_g2_vp9_dec_done,
>   		.init = hantro_vp9_dec_init,
>   		.exit = hantro_vp9_dec_exit,
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ