linux-kernel - Re: [PATCH 5/5] media: iris: Add internal buffer calculation for AV1 decoder

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ec3e93e72e326db4e61fed33ade0547935ab6dca.camel@ndufresne.ca>
Date: Thu, 02 Oct 2025 08:38:00 -0400
From: Nicolas Dufresne <nicolas@...fresne.ca>
To: Bryan O'Donoghue <bod@...nel.org>, Deepa Guthyappa Madivalara	
 <deepa.madivalara@....qualcomm.com>, Mauro Carvalho Chehab
 <mchehab@...nel.org>,  Vikash Garodia <vikash.garodia@....qualcomm.com>,
 Dikshita Agarwal <dikshita.agarwal@....qualcomm.com>,  Abhinav Kumar
 <abhinav.kumar@...ux.dev>
Cc: linux-media@...r.kernel.org, linux-kernel@...r.kernel.org, 
	linux-arm-msm@...r.kernel.org
Subject: Re: [PATCH 5/5] media: iris: Add internal buffer calculation for
 AV1 decoder

Hi,

Le jeudi 02 octobre 2025 à 00:30 +0100, Bryan O'Donoghue a écrit :
> On 01/10/2025 20:00, Deepa Guthyappa Madivalara wrote:
> > Implement internal buffer count and size calculations for AV1 decoder.
> > 
> > Signed-off-by: Deepa Guthyappa Madivalara <deepa.madivalara@....qualcomm.com>
> > ---
> >   drivers/media/platform/qcom/iris/iris_buffer.h     |   1 +
> >   .../platform/qcom/iris/iris_hfi_gen2_command.c     |   1 -
> >   drivers/media/platform/qcom/iris/iris_vpu_buffer.c | 255 ++++++++++++++++++++-
> >   drivers/media/platform/qcom/iris/iris_vpu_buffer.h | 105 +++++++++
> >   4 files changed, 357 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/media/platform/qcom/iris/iris_buffer.h b/drivers/media/platform/qcom/iris/iris_buffer.h
> > index 5ef365d9236c7cbdee24a4614789b3191881968b..75bb767761824c4c02e0df9b765896cc093be333 100644
> > --- a/drivers/media/platform/qcom/iris/iris_buffer.h
> > +++ b/drivers/media/platform/qcom/iris/iris_buffer.h
> > @@ -27,6 +27,7 @@ struct iris_inst;
> >    * @BUF_SCRATCH_1: buffer to store decoding/encoding context data for HW
> >    * @BUF_SCRATCH_2: buffer to store encoding context data for HW
> >    * @BUF_VPSS: buffer to store VPSS context data for HW
> > + * @BUF_PARTIAL: buffer for AV1 IBC data
> >    * @BUF_TYPE_MAX: max buffer types
> >    */
> >   enum iris_buffer_type {
> > diff --git a/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c b/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
> > index e3a8b031b3f191a6d18e1084db34804a8172439c..000bf75ba74ace5e10585910cda02975b0c34304 100644
> > --- a/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
> > +++ b/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
> > @@ -488,7 +488,6 @@ static int iris_hfi_gen2_set_linear_stride_scanline(struct iris_inst *inst, u32
> > 
> >   static int iris_hfi_gen2_set_tier(struct iris_inst *inst, u32 plane)
> >   {
> > -	struct iris_inst_hfi_gen2 *inst_hfi_gen2 = to_iris_inst_hfi_gen2(inst);
> >   	u32 port = iris_hfi_gen2_get_port(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
> >   	u32 tier = inst->fw_caps[TIER].value;
> > 
> > diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> > index 4463be05ce165adef6b152eb0c155d2e6a7b3c36..17d3a7ae79e994257d596906cb4c17250a11a0cb 100644
> > --- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> > +++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> > @@ -9,6 +9,14 @@
> >   #include "iris_hfi_gen2_defines.h"
> > 
> >   #define HFI_MAX_COL_FRAME 6
> > +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT (8)
> > +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH (32)
> > +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT (8)
> > +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH (16)
> > +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT (4)
> > +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH (48)
> > +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT (4)
> > +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH (24)
> > 
> >   #ifndef SYSTEM_LAL_TILE10
> >   #define SYSTEM_LAL_TILE10 192
> > @@ -39,6 +47,31 @@ static u32 hfi_buffer_bin_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_p
> >   	return size_h264d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
> >   }
> > 
> > +static u32 size_av1d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> > +{
> > +	u32 size_yuv, size_bin_hdr, size_bin_res;
> > +
> > +	size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ?
> > +		((BIN_BUFFER_THRESHOLD * 3) >> 1) :
> > +		((frame_width * frame_height * 3) >> 1);
> > +	size_bin_hdr = size_yuv * AV1_CABAC_HDR_RATIO_HD_TOT;
> > +	size_bin_res = size_yuv * AV1_CABAC_RES_RATIO_HD_TOT;
> > +	size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes,
> > +			     DMA_ALIGNMENT) * num_vpp_pipes;
> > +	size_bin_res = ALIGN(size_bin_res / num_vpp_pipes,
> > +			     DMA_ALIGNMENT) * num_vpp_pipes;
> > +
> > +	return size_bin_hdr + size_bin_res;
> > +}
> > +
> > +static u32 hfi_buffer_bin_av1d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> > +{
> > +	u32 n_aligned_h = ALIGN(frame_height, 16);
> > +	u32 n_aligned_w = ALIGN(frame_width, 16);
> > +
> > +	return size_av1d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
> > +}
> > +
> >   static u32 size_h265d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> >   {
> >   	u32 product = frame_width * frame_height;
> > @@ -110,6 +143,20 @@ static u32 hfi_buffer_comv_h265d(u32 frame_width, u32 frame_height, u32 _comv_bu
> >   	return (_size * (_comv_bufcount)) + 512;
> >   }
> 
> What's this alignment stuffed onto the end about ?
> 
> Please guys give these magic numbers meaningful names.

That would be nice, then I'll be able to document that for Hantro AV1 decoder
too. It was assumed when we picked the driver that the table was hardware
specific, but if its not, a drivers/media/v4l2-core/v4l2-av1.c is welcome.

>> drivers/media/platform/verisilicon/hantro_hw.h:555 
static inline size_t
hantro_av1_mv_size(unsigned int width, unsigned int height)
{
	size_t num_sbs = hantro_av1_num_sbs(width) * hantro_av1_num_sbs(height);

	return ALIGN(num_sbs * 384, 16) * 2 + 512;
}

> 
> > +static u32 hfi_buffer_comv_av1d(u32 frame_width, u32 frame_height, u32 comv_bufcount)
> > +{
> > +	u32 size;
> > +
> > +	size =  2 * ALIGN(MAX(((frame_width + 63) / 64) *
> > +		((frame_height + 63) / 64) * 512,

This looks like div_round_up()  ?

> > +		((frame_width + 127) / 128) *
> > +		((frame_height + 127) / 128) * 2816),
> > +		DMA_ALIGNMENT);
> > +	size *= comv_bufcount;
> 
> 
> I'm sure this calculation is right and produces the correct value in all 
> instances - probably anyway but also does it ?
> 
> It is not obvious looking at this code that it is obviously correct.
> 
> I have a similar comment for alot of these Iris patches - we end up with 
> highly complex calculations using magic numbers which my guess would be 
> even people immersed in the firmware/driver/silicon development have a 
> hard time looking at and "just knowing" the code is correct.
> 
> Please reduce these calculations down to some kind of define that - for 
> example an intelligent programmer - an oxymoron of a term I accept - 
> could read the code and actually understand what is going on.
> 
> That programmer might even be yourself. You should be able to come along 
> in two, five, eight years time, look at a code snippet and pretty much 
> understand what it is doing and why without having to have a deep 
> epiphany when doing it.
> 
> These complex clauses stuffed with magic numbers and sometimes bitshfts 
> with a few alignments thrown in for good measure are inscrutable.

I agree with this, when the driver is not from the hardware maker, this can be
justified, but since you have full access to the documentation and probably can
ask the designers, it would be nicer to replace 64, 128, 512 and 2816 by named
macro or const. Its not a blame, many drivers are like this already.

Nicolas

> 
> > +	return size;
> > +}
> > +
> >   static u32 size_h264d_bse_cmd_buf(u32 frame_height)
> >   {
> >   	u32 height = ALIGN(frame_height, 32);
> > @@ -174,6 +221,20 @@ static u32 hfi_buffer_persist_h264d(void)
> >   		    DMA_ALIGNMENT);
> >   }
> > 
> > +static u32 hfi_buffer_persist_av1d(u32 max_width, u32 max_height, u32 total_ref_count)
> > +{
> > +	u32 comv_size, size;
> > +
> > +	comv_size =  hfi_buffer_comv_av1d(max_width, max_height, total_ref_count);
> > +	size = ALIGN((SIZE_AV1D_SEQUENCE_HEADER * 2 + SIZE_AV1D_METADATA +
> > +	AV1D_NUM_HW_PIC_BUF * (SIZE_AV1D_TILE_OFFSET + SIZE_AV1D_QM) +
> > +	AV1D_NUM_FRAME_HEADERS * (SIZE_AV1D_FRAME_HEADER +
> > +	2 * SIZE_AV1D_PROB_TABLE) + comv_size + HDR10_HIST_EXTRADATA_SIZE +
> > +	SIZE_AV1D_METADATA * AV1D_NUM_HW_PIC_BUF), DMA_ALIGNMENT);
> > +
> > +	return ALIGN(size, DMA_ALIGNMENT);
> > +}
> > +
> >   static u32 hfi_buffer_non_comv_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> >   {
> >   	u32 size_bse = size_h264d_bse_cmd_buf(frame_height);
> > @@ -459,6 +520,148 @@ static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height,
> >   	return ALIGN((size + vpss_lb_size), DMA_ALIGNMENT);
> >   }
> > 
> > +static u32 size_av1d_lb_opb_wr1_nv12_ubwc(u32 frame_width, u32 frame_height)
> > +{
> > +	u32 y_width, y_width_a = 128;
> > +
> > +	y_width = ALIGN(frame_width, y_width_a);
> > +
> > +	return (256 * ((y_width + 31) / 32 + (AV1D_MAX_TILE_COLS - 1)));
> > +}
> > +
> > +static u32 size_av1d_lb_opb_wr1_tp10_ubwc(u32 frame_width, u32 frame_height)
> > +{
> > +	u32 y_width, y_width_a = 256;
> > +
> > +	y_width = ALIGN(frame_width, 192);
> > +	y_width = ALIGN(y_width * 4 / 3, y_width_a);
> > +
> > +	return (256 * ((y_width + 47) / 48 + (AV1D_MAX_TILE_COLS - 1)));
> 
> y_width is a thing times 4 divided by 3 aligned to 192.
> 
> OK
> 
> Then we return 256 * ((y_width + 47?) / 48 + (A_DEFINE_NICE - 1)));
> 
> 47 ? The magic number in the routine above is 31.
> 
> I don't think I'd be comfortable giving an RB for this. You guys need to 
> take steps to make your code more digestable - zapping the complex 
> bit-shifts and magic numbers.
> 
> I don't see how a reviewer can really be expected to fit this into their 
> head and say "yep LGTM" needs to be decoded both for the sake of the 
> reviewer and for future coders, perhaps even future you trying to figure 
> out where the bug is..
> 
> ---
> bod

Download attachment "signature.asc" of type "application/pgp-signature" (229 bytes)