linux-kernel - Re: [PATCH 5/5] media: iris: Add internal buffer calculation for AV1 decoder

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <970f0c9a-a8ef-4899-baa3-83f92d261ba8@oss.qualcomm.com>
Date: Thu, 2 Oct 2025 12:37:13 -0700
From: Deepa Guthyappa Madivalara <deepa.madivalara@....qualcomm.com>
To: Nicolas Dufresne <nicolas@...fresne.ca>,
        Bryan O'Donoghue
 <bod@...nel.org>,
        Mauro Carvalho Chehab <mchehab@...nel.org>,
        Vikash Garodia <vikash.garodia@....qualcomm.com>,
        Dikshita Agarwal <dikshita.agarwal@....qualcomm.com>,
        Abhinav Kumar <abhinav.kumar@...ux.dev>
Cc: linux-media@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-arm-msm@...r.kernel.org
Subject: Re: [PATCH 5/5] media: iris: Add internal buffer calculation for AV1
 decoder


On 10/2/2025 5:38 AM, Nicolas Dufresne wrote:
> Hi,
>
> Le jeudi 02 octobre 2025 à 00:30 +0100, Bryan O'Donoghue a écrit :
>> On 01/10/2025 20:00, Deepa Guthyappa Madivalara wrote:
>>> Implement internal buffer count and size calculations for AV1 decoder.
>>>
>>> Signed-off-by: Deepa Guthyappa Madivalara <deepa.madivalara@....qualcomm.com>
>>> ---
>>>    drivers/media/platform/qcom/iris/iris_buffer.h     |   1 +
>>>    .../platform/qcom/iris/iris_hfi_gen2_command.c     |   1 -
>>>    drivers/media/platform/qcom/iris/iris_vpu_buffer.c | 255 ++++++++++++++++++++-
>>>    drivers/media/platform/qcom/iris/iris_vpu_buffer.h | 105 +++++++++
>>>    4 files changed, 357 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/media/platform/qcom/iris/iris_buffer.h b/drivers/media/platform/qcom/iris/iris_buffer.h
>>> index 5ef365d9236c7cbdee24a4614789b3191881968b..75bb767761824c4c02e0df9b765896cc093be333 100644
>>> --- a/drivers/media/platform/qcom/iris/iris_buffer.h
>>> +++ b/drivers/media/platform/qcom/iris/iris_buffer.h
>>> @@ -27,6 +27,7 @@ struct iris_inst;
>>>     * @BUF_SCRATCH_1: buffer to store decoding/encoding context data for HW
>>>     * @BUF_SCRATCH_2: buffer to store encoding context data for HW
>>>     * @BUF_VPSS: buffer to store VPSS context data for HW
>>> + * @BUF_PARTIAL: buffer for AV1 IBC data
>>>     * @BUF_TYPE_MAX: max buffer types
>>>     */
>>>    enum iris_buffer_type {
>>> diff --git a/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c b/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
>>> index e3a8b031b3f191a6d18e1084db34804a8172439c..000bf75ba74ace5e10585910cda02975b0c34304 100644
>>> --- a/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
>>> +++ b/drivers/media/platform/qcom/iris/iris_hfi_gen2_command.c
>>> @@ -488,7 +488,6 @@ static int iris_hfi_gen2_set_linear_stride_scanline(struct iris_inst *inst, u32
>>>
>>>    static int iris_hfi_gen2_set_tier(struct iris_inst *inst, u32 plane)
>>>    {
>>> -	struct iris_inst_hfi_gen2 *inst_hfi_gen2 = to_iris_inst_hfi_gen2(inst);
>>>    	u32 port = iris_hfi_gen2_get_port(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
>>>    	u32 tier = inst->fw_caps[TIER].value;
>>>
>>> diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
>>> index 4463be05ce165adef6b152eb0c155d2e6a7b3c36..17d3a7ae79e994257d596906cb4c17250a11a0cb 100644
>>> --- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
>>> +++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
>>> @@ -9,6 +9,14 @@
>>>    #include "iris_hfi_gen2_defines.h"
>>>
>>>    #define HFI_MAX_COL_FRAME 6
>>> +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT (8)
>>> +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH (32)
>>> +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT (8)
>>> +#define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH (16)
>>> +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT (4)
>>> +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH (48)
>>> +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT (4)
>>> +#define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH (24)
>>>
>>>    #ifndef SYSTEM_LAL_TILE10
>>>    #define SYSTEM_LAL_TILE10 192
>>> @@ -39,6 +47,31 @@ static u32 hfi_buffer_bin_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_p
>>>    	return size_h264d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
>>>    }
>>>
>>> +static u32 size_av1d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>>> +{
>>> +	u32 size_yuv, size_bin_hdr, size_bin_res;
>>> +
>>> +	size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ?
>>> +		((BIN_BUFFER_THRESHOLD * 3) >> 1) :
>>> +		((frame_width * frame_height * 3) >> 1);
>>> +	size_bin_hdr = size_yuv * AV1_CABAC_HDR_RATIO_HD_TOT;
>>> +	size_bin_res = size_yuv * AV1_CABAC_RES_RATIO_HD_TOT;
>>> +	size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes,
>>> +			     DMA_ALIGNMENT) * num_vpp_pipes;
>>> +	size_bin_res = ALIGN(size_bin_res / num_vpp_pipes,
>>> +			     DMA_ALIGNMENT) * num_vpp_pipes;
>>> +
>>> +	return size_bin_hdr + size_bin_res;
>>> +}
>>> +
>>> +static u32 hfi_buffer_bin_av1d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>>> +{
>>> +	u32 n_aligned_h = ALIGN(frame_height, 16);
>>> +	u32 n_aligned_w = ALIGN(frame_width, 16);
>>> +
>>> +	return size_av1d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
>>> +}
>>> +
>>>    static u32 size_h265d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>>>    {
>>>    	u32 product = frame_width * frame_height;
>>> @@ -110,6 +143,20 @@ static u32 hfi_buffer_comv_h265d(u32 frame_width, u32 frame_height, u32 _comv_bu
>>>    	return (_size * (_comv_bufcount)) + 512;
>>>    }
>> What's this alignment stuffed onto the end about ?
>>
>> Please guys give these magic numbers meaningful names.
> That would be nice, then I'll be able to document that for Hantro AV1 decoder
> too. It was assumed when we picked the driver that the table was hardware
> specific, but if its not, a drivers/media/v4l2-core/v4l2-av1.c is welcome.
>
>>> drivers/media/platform/verisilicon/hantro_hw.h:555
> static inline size_t
> hantro_av1_mv_size(unsigned int width, unsigned int height)
> {
> 	size_t num_sbs = hantro_av1_num_sbs(width) * hantro_av1_num_sbs(height);
>
> 	return ALIGN(num_sbs * 384, 16) * 2 + 512;
> }
> +static u32 hfi_buffer_comv_av1d(u32 frame_width, u32 frame_height, u32 comv_bufcount)
> +{
> +	u32 size;
> +
> +	size =  2 * ALIGN(MAX(((frame_width + 63) / 64) *
> +		((frame_height + 63) / 64) * 512,
> This looks like div_round_up()  ?
>
>>> +		((frame_width + 127) / 128) *
>>> +		((frame_height + 127) / 128) * 2816),
>>> +		DMA_ALIGNMENT);
>>> +	size *= comv_bufcount;
>>
>> I'm sure this calculation is right and produces the correct value in all
>> instances - probably anyway but also does it ?
>>
>> It is not obvious looking at this code that it is obviously correct.
>>
>> I have a similar comment for alot of these Iris patches - we end up with
>> highly complex calculations using magic numbers which my guess would be
>> even people immersed in the firmware/driver/silicon development have a
>> hard time looking at and "just knowing" the code is correct.
>>
>> Please reduce these calculations down to some kind of define that - for
>> example an intelligent programmer - an oxymoron of a term I accept -
>> could read the code and actually understand what is going on.
>>
>> That programmer might even be yourself. You should be able to come along
>> in two, five, eight years time, look at a code snippet and pretty much
>> understand what it is doing and why without having to have a deep
>> epiphany when doing it.
>>
>> These complex clauses stuffed with magic numbers and sometimes bitshfts
>> with a few alignments thrown in for good measure are inscrutable.
> I agree with this, when the driver is not from the hardware maker, this can be
> justified, but since you have full access to the documentation and probably can
> ask the designers, it would be nicer to replace 64, 128, 512 and 2816 by named
> macro or const. Its not a blame, many drivers are like this already.
>
> Nicolas

Thank you for the feedback. I am working on making this as much clearer
as I can.

>>> +	return size;
>>> +}
>>> +
>>>    static u32 size_h264d_bse_cmd_buf(u32 frame_height)
>>>    {
>>>    	u32 height = ALIGN(frame_height, 32);
>>> @@ -174,6 +221,20 @@ static u32 hfi_buffer_persist_h264d(void)
>>>    		    DMA_ALIGNMENT);
>>>    }
>>>
>>> +static u32 hfi_buffer_persist_av1d(u32 max_width, u32 max_height, u32 total_ref_count)
>>> +{
>>> +	u32 comv_size, size;
>>> +
>>> +	comv_size =  hfi_buffer_comv_av1d(max_width, max_height, total_ref_count);
>>> +	size = ALIGN((SIZE_AV1D_SEQUENCE_HEADER * 2 + SIZE_AV1D_METADATA +
>>> +	AV1D_NUM_HW_PIC_BUF * (SIZE_AV1D_TILE_OFFSET + SIZE_AV1D_QM) +
>>> +	AV1D_NUM_FRAME_HEADERS * (SIZE_AV1D_FRAME_HEADER +
>>> +	2 * SIZE_AV1D_PROB_TABLE) + comv_size + HDR10_HIST_EXTRADATA_SIZE +
>>> +	SIZE_AV1D_METADATA * AV1D_NUM_HW_PIC_BUF), DMA_ALIGNMENT);
>>> +
>>> +	return ALIGN(size, DMA_ALIGNMENT);
>>> +}
>>> +
>>>    static u32 hfi_buffer_non_comv_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>>>    {
>>>    	u32 size_bse = size_h264d_bse_cmd_buf(frame_height);
>>> @@ -459,6 +520,148 @@ static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height,
>>>    	return ALIGN((size + vpss_lb_size), DMA_ALIGNMENT);
>>>    }
>>>
>>> +static u32 size_av1d_lb_opb_wr1_nv12_ubwc(u32 frame_width, u32 frame_height)
>>> +{
>>> +	u32 y_width, y_width_a = 128;
>>> +
>>> +	y_width = ALIGN(frame_width, y_width_a);
>>> +
>>> +	return (256 * ((y_width + 31) / 32 + (AV1D_MAX_TILE_COLS - 1)));
>>> +}
>>> +
>>> +static u32 size_av1d_lb_opb_wr1_tp10_ubwc(u32 frame_width, u32 frame_height)
>>> +{
>>> +	u32 y_width, y_width_a = 256;
>>> +
>>> +	y_width = ALIGN(frame_width, 192);
>>> +	y_width = ALIGN(y_width * 4 / 3, y_width_a);
>>> +
>>> +	return (256 * ((y_width + 47) / 48 + (AV1D_MAX_TILE_COLS - 1)));
>> y_width is a thing times 4 divided by 3 aligned to 192.
>>
>> OK
>>
>> Then we return 256 * ((y_width + 47?) / 48 + (A_DEFINE_NICE - 1)));
>>
>> 47 ? The magic number in the routine above is 31.
>>
>> I don't think I'd be comfortable giving an RB for this. You guys need to
>> take steps to make your code more digestable - zapping the complex
>> bit-shifts and magic numbers.
>>
>> I don't see how a reviewer can really be expected to fit this into their
>> head and say "yep LGTM" needs to be decoded both for the sake of the
>> reviewer and for future coders, perhaps even future you trying to figure
>> out where the bug is..
>>
>> ---
>> bod

Understood, thank you.  I'm continuing to work on simplifying the code to
make it more meaningful.

Regards,
Deepa