lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250925-knp_video-v1-4-e323c0b3c0cd@oss.qualcomm.com>
Date: Thu, 25 Sep 2025 04:44:42 +0530
From: Vikash Garodia <vikash.garodia@....qualcomm.com>
To: Dikshita Agarwal <dikshita.agarwal@....qualcomm.com>,
        Abhinav Kumar <abhinav.kumar@...ux.dev>,
        Bryan O'Donoghue <bod@...nel.org>,
        Mauro Carvalho Chehab <mchehab@...nel.org>,
        Rob Herring <robh@...nel.org>,
        Krzysztof Kozlowski <krzk+dt@...nel.org>,
        Conor Dooley <conor+dt@...nel.org>,
        Philipp Zabel <p.zabel@...gutronix.de>
Cc: linux-arm-msm@...r.kernel.org, linux-media@...r.kernel.org,
        devicetree@...r.kernel.org, linux-kernel@...r.kernel.org,
        Vishnu Reddy <quic_bvisredd@...cinc.com>,
        Vikash Garodia <vikash.garodia@....qualcomm.com>
Subject: [PATCH 4/8] media: iris: Introduce buffer size calculations for
 vpu4

Introduces vp4 buffer size calculation for both encoder and decoder.
Reuse the buffer size calculation which are common, while adding the
vpu4 ones separately.

Co-developed-by: Vishnu Reddy <quic_bvisredd@...cinc.com>
Signed-off-by: Vishnu Reddy <quic_bvisredd@...cinc.com>
Signed-off-by: Vikash Garodia <vikash.garodia@....qualcomm.com>
---
 drivers/media/platform/qcom/iris/iris_vpu_buffer.c | 289 +++++++++++++++++++++
 drivers/media/platform/qcom/iris/iris_vpu_buffer.h |   5 +-
 2 files changed, 293 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
index 4463be05ce165adef6b152eb0c155d2e6a7b3c36..a08925e941b34d6df86b19ca52691327c020c811 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
+++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
@@ -1408,6 +1408,251 @@ static u32 iris_vpu_enc_vpss_size(struct iris_inst *inst)
 	return hfi_buffer_vpss_enc(width, height, ds_enable, 0, 0);
 }
 
+static u32 hfi_vpu4x_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
+{
+	u32 max_value = max(frame_width, frame_height);
+	u32 size_vp9d_qp = DIV_ROUND_UP(frame_width, 64) * DIV_ROUND_UP(frame_height, 64) * 128;
+	u32 size_dpb_obp = (ALIGN(max_value, 64) * 192) + (256 * 6);
+	u32 size_vp9d_fe_left_lb = ALIGN(max_value, 64) * 492;
+	u32 size_vp9d_top_lb = (ALIGN(max_value, 64) * 190) + 256;
+	u32 size_vp9d_se_left_lb = ALIGN(max_value, 64);
+
+	return size_vp9d_qp + (size_dpb_obp * num_vpp_pipes) + size_vp9d_fe_left_lb +
+			size_vp9d_top_lb + (size_vp9d_se_left_lb * num_vpp_pipes);
+}
+
+static u32 hfi_vpu4x_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min,
+				      bool is_opb, u32 num_vpp_pipes)
+{
+	u32 lb_size = hfi_vpu4x_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes);
+	u32 dpb_obp_size = 0;
+
+	if (is_opb)
+		dpb_obp_size = ((ALIGN(max(frame_width, frame_height), 64) * 192) + (256 * 6)) *
+				num_vpp_pipes;
+
+	return lb_size + dpb_obp_size;
+}
+
+static u32 iris_vpu4x_dec_line_size(struct iris_inst *inst)
+{
+	u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
+	u32 out_min_count = inst->buffers[BUF_OUTPUT].min_count;
+	struct v4l2_format *f = inst->fmt_src;
+	u32 height = f->fmt.pix_mp.height;
+	u32 width = f->fmt.pix_mp.width;
+	bool is_opb = false;
+
+	if (iris_split_mode_enabled(inst))
+		is_opb = true;
+
+	if (inst->codec == V4L2_PIX_FMT_H264)
+		return hfi_buffer_line_h264d(width, height, is_opb, num_vpp_pipes);
+	else if (inst->codec == V4L2_PIX_FMT_HEVC)
+		return hfi_buffer_line_h265d(width, height, is_opb, num_vpp_pipes);
+	else if (inst->codec == V4L2_PIX_FMT_VP9)
+		return hfi_vpu4x_buffer_line_vp9d(width, height, out_min_count, is_opb,
+						  num_vpp_pipes);
+
+	return 0;
+}
+
+static u32 hfi_buffer4x_persist_h265d(u32 rpu_enabled)
+{
+	return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_FRM_INFO *
+		H265_DISPLAY_BUF_SIZE + (H265_NUM_TILE * sizeof(u32)) + (NUM_HW_PIC_BUF *
+		(SIZE_SEI_USERDATA + SIZE_H265D_ARP + SIZE_THREE_DIMENSION_USERDATA)) +
+		rpu_enabled * NUM_HW_PIC_BUF * SIZE_DOLBY_RPU_METADATA), DMA_ALIGNMENT);
+}
+
+static u32 iris_vpu4x_dec_persist_size(struct iris_inst *inst)
+{
+	if (inst->codec == V4L2_PIX_FMT_H264)
+		return hfi_buffer_persist_h264d();
+	else if (inst->codec == V4L2_PIX_FMT_HEVC)
+		return hfi_buffer4x_persist_h265d(0);
+	else if (inst->codec == V4L2_PIX_FMT_VP9)
+		return hfi_buffer_persist_vp9d();
+
+	return 0;
+}
+
+static u32 size_se_lb(u32 standard, u32 num_vpp_pipes_enc,
+		      u32 frame_width_coded, u32 frame_height_coded)
+{
+	u32 se_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT);
+	u32 se_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ?
+			   ((frame_height_coded + 32 - 1) / 32) * 4 * 16 :
+			   ((frame_height_coded + 16 - 1) / 16) * 5 * 16;
+
+	se_llb_size = ALIGN(se_llb_size, 32);
+
+	if (num_vpp_pipes_enc > 1)
+		se_llb_size = ALIGN(se_llb_size + 512, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	return ALIGN(se_tlb_size + se_llb_size, DMA_ALIGNMENT);
+}
+
+static u32 size_te_lb(bool is_ten_bit, u32 num_vpp_pipes_enc, u32 width_in_lcus,
+		      u32 frame_height_coded, u32 frame_width_coded)
+{
+	u32 te_llb_col_rc_size = ALIGN(32 * width_in_lcus / num_vpp_pipes_enc,
+				       DMA_ALIGNMENT) * num_vpp_pipes_enc;
+	u32 te_tlb_recon_data_size = ALIGN((is_ten_bit ? 3 : 2) * frame_width_coded,
+					   DMA_ALIGNMENT);
+	u32 te_llb_recon_data_size = ((1 + is_ten_bit) * 3 * frame_height_coded +
+				      num_vpp_pipes_enc - 1) / num_vpp_pipes_enc;
+	te_llb_recon_data_size = ALIGN(te_llb_recon_data_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	return ALIGN(te_llb_recon_data_size + te_llb_col_rc_size + te_tlb_recon_data_size,
+		     DMA_ALIGNMENT);
+}
+
+static u32 size_fe_lb(bool is_ten_bit, u32 standard, u32 num_vpp_pipes_enc,
+		      u32 frame_height_coded, u32 frame_width_coded)
+{
+	u32 log2_lcu_size, num_cu_in_height_pipe, num_cu_in_width,
+		fb_llb_db_ctrl_size, fb_llb_db_luma_size, fb_llb_db_chroma_size,
+		fb_tlb_db_ctrl_size, fb_tlb_db_luma_size, fb_tlb_db_chroma_size,
+		fb_llb_sao_ctrl_size, fb_llb_sao_luma_size,
+		fb_llb_sao_chroma_size, fb_tlb_sao_ctrl_size,
+		fb_tlb_sao_luma_size, fb_tlb_sao_chroma_size,
+		fb_lb_top_sdc_size, fb_lb_se_ctrl_size, fe_tlb_size,
+		size_per_lcu;
+
+	log2_lcu_size = (standard == HFI_CODEC_ENCODE_HEVC) ? 5 : 4;
+	num_cu_in_height_pipe = ((frame_height_coded >> log2_lcu_size) + num_vpp_pipes_enc - 1) /
+				 num_vpp_pipes_enc;
+	num_cu_in_width = frame_width_coded >> log2_lcu_size;
+
+	size_per_lcu = 2;
+	fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_llb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+	fb_llb_db_ctrl_size = ALIGN(fb_llb_db_ctrl_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	size_per_lcu = (1 << (log2_lcu_size - 3));
+	fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_llb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+	fb_llb_db_luma_size = ALIGN(fb_llb_db_luma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	size_per_lcu = ((1 << (log2_lcu_size - 4)) * 2);
+	fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_llb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+	fb_llb_db_chroma_size = ALIGN(fb_llb_db_chroma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	size_per_lcu = 1;
+	fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_tlb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+	size_per_lcu = ((1 << (log2_lcu_size - 3)) + 1);
+	fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_tlb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+	size_per_lcu = (2 * ((1 << (log2_lcu_size - 4)) + 1));
+	fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_tlb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+	size_per_lcu = 1;
+	fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_llb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+	fb_llb_sao_ctrl_size = fb_llb_sao_ctrl_size * num_vpp_pipes_enc;
+
+	fb_llb_sao_luma_size = 256 * num_vpp_pipes_enc;
+	fb_llb_sao_chroma_size = 256 * num_vpp_pipes_enc;
+
+	size_per_lcu = 1;
+	fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+	fb_tlb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+	fb_tlb_sao_luma_size = 256;
+	fb_tlb_sao_chroma_size = 256;
+	fb_lb_top_sdc_size = ALIGN((16 * (frame_width_coded >> 5)), DMA_ALIGNMENT);
+
+	fb_lb_se_ctrl_size = ALIGN((2020 * (frame_width_coded >> 5)), DMA_ALIGNMENT);
+
+	return fb_llb_db_ctrl_size + fb_llb_db_luma_size + fb_llb_db_chroma_size +
+		fb_tlb_db_ctrl_size + fb_tlb_db_luma_size + fb_tlb_db_chroma_size +
+		fb_llb_sao_ctrl_size + fb_llb_sao_luma_size + fb_llb_sao_chroma_size +
+		fb_tlb_sao_ctrl_size + fb_tlb_sao_luma_size + fb_tlb_sao_chroma_size +
+		fb_lb_top_sdc_size + fb_lb_se_ctrl_size;
+}
+
+static u32 size_md_lb(u32 standard, u32 frame_width_coded,
+		      u32 frame_height_coded, u32 num_vpp_pipes_enc)
+{
+	u32 md_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT);
+	u32 md_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ?
+			   ((frame_height_coded + 32 - 1) / 32) * 4 * 16 :
+			   ((frame_height_coded + 16 - 1) / 16) * 5 * 16;
+
+	md_llb_size = ALIGN(md_llb_size, 32);
+
+	if (num_vpp_pipes_enc > 1)
+		md_llb_size = ALIGN(md_llb_size + 512, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+	md_llb_size = ALIGN(md_llb_size, DMA_ALIGNMENT);
+
+	return ALIGN(md_tlb_size + md_llb_size, DMA_ALIGNMENT);
+}
+
+static u32 size_dma_opb_lb(u32 num_vpp_pipes_enc, u32 frame_width_coded,
+			   u32 frame_height_coded)
+{
+	u32 dma_opb_wr_tlb_y_size = ((frame_width_coded + 15) >> 4) << 7;
+	u32 dma_opb_wr_tlb_uv_size = ((frame_width_coded + 15) >> 4) << 7;
+	u32 dma_opb_wr2_tlb_y_size = ALIGN((2 * 6 * 64 * frame_height_coded / 8), DMA_ALIGNMENT) *
+					   num_vpp_pipes_enc;
+	u32 dma_opb_wr2_tlb_uv_size = ALIGN((2 * 6 * 64 * frame_height_coded / 8), DMA_ALIGNMENT) *
+					    num_vpp_pipes_enc;
+
+	dma_opb_wr2_tlb_y_size = max(dma_opb_wr2_tlb_y_size, dma_opb_wr_tlb_y_size << 1);
+	dma_opb_wr2_tlb_uv_size = max(dma_opb_wr2_tlb_uv_size, dma_opb_wr_tlb_uv_size << 1);
+
+	return ALIGN(dma_opb_wr_tlb_y_size + dma_opb_wr_tlb_uv_size + dma_opb_wr2_tlb_y_size +
+		     dma_opb_wr2_tlb_uv_size, DMA_ALIGNMENT);
+}
+
+static u32 hfi_vpu4x_buffer_line_enc(u32 frame_width, u32 frame_height,
+				     bool is_ten_bit, u32 num_vpp_pipes_enc,
+				     u32 lcu_size, u32 standard)
+{
+	u32 width_in_lcus = (frame_width + lcu_size - 1) / lcu_size;
+	u32 height_in_lcus = (frame_height + lcu_size - 1) / lcu_size;
+	u32 frame_width_coded = width_in_lcus * lcu_size;
+	u32 frame_height_coded = height_in_lcus * lcu_size;
+
+	u32 se_lb_size = size_se_lb(standard, num_vpp_pipes_enc, frame_width_coded,
+				    frame_height_coded);
+	u32 te_lb_size = size_te_lb(is_ten_bit, num_vpp_pipes_enc, width_in_lcus,
+				    frame_height_coded, frame_width_coded);
+	u32 fe_lb_size = size_fe_lb(is_ten_bit, standard, num_vpp_pipes_enc, frame_height_coded,
+				    frame_width_coded);
+	u32 md_lb_size = size_md_lb(standard, frame_width_coded, frame_height_coded,
+				    num_vpp_pipes_enc);
+	u32 dma_opb_lb_size = size_dma_opb_lb(num_vpp_pipes_enc, frame_width_coded,
+					      frame_height_coded);
+	u32 dse_lb_size = ALIGN((256 + (16 * (frame_width_coded >> 4))), DMA_ALIGNMENT);
+	u32 size_vpss_lb_enc = size_vpss_line_buf_vpu33(num_vpp_pipes_enc, frame_width_coded,
+							frame_height_coded);
+	u32 size = se_lb_size + te_lb_size + fe_lb_size + md_lb_size + dma_opb_lb_size +
+				dse_lb_size + size_vpss_lb_enc;
+	size = size << 1;
+
+	return size;
+}
+
+static u32 iris_vpu4x_enc_line_size(struct iris_inst *inst)
+{
+	u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
+	u32 lcu_size = inst->codec == V4L2_PIX_FMT_HEVC ? 32 : 16;
+	struct v4l2_format *f = inst->fmt_dst;
+	u32 height = f->fmt.pix_mp.height;
+	u32 width = f->fmt.pix_mp.width;
+
+	return hfi_vpu4x_buffer_line_enc(width, height, 0, num_vpp_pipes,
+					 lcu_size, inst->codec);
+}
+
 static int output_min_count(struct iris_inst *inst)
 {
 	int output_min_count = 4;
@@ -1503,6 +1748,50 @@ u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_typ
 	return size;
 }
 
+u32 iris_vpu4x_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type)
+{
+	const struct iris_vpu_buf_type_handle *buf_type_handle_arr = NULL;
+	u32 size = 0, buf_type_handle_size = 0, i;
+
+	static const struct iris_vpu_buf_type_handle dec_internal_buf_type_handle[] = {
+		{BUF_BIN,         iris_vpu_dec_bin_size         },
+		{BUF_COMV,        iris_vpu_dec_comv_size        },
+		{BUF_NON_COMV,    iris_vpu_dec_non_comv_size    },
+		{BUF_LINE,        iris_vpu4x_dec_line_size      },
+		{BUF_PERSIST,     iris_vpu4x_dec_persist_size   },
+		{BUF_DPB,         iris_vpu_dec_dpb_size         },
+		{BUF_SCRATCH_1,   iris_vpu_dec_scratch1_size    },
+	};
+
+	static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = {
+		{BUF_BIN,         iris_vpu_enc_bin_size         },
+		{BUF_COMV,        iris_vpu_enc_comv_size        },
+		{BUF_NON_COMV,    iris_vpu_enc_non_comv_size    },
+		{BUF_LINE,        iris_vpu4x_enc_line_size      },
+		{BUF_ARP,         iris_vpu_enc_arp_size         },
+		{BUF_VPSS,        iris_vpu_enc_vpss_size        },
+		{BUF_SCRATCH_1,   iris_vpu_enc_scratch1_size    },
+		{BUF_SCRATCH_2,   iris_vpu_enc_scratch2_size    },
+	};
+
+	if (inst->domain == DECODER) {
+		buf_type_handle_size = ARRAY_SIZE(dec_internal_buf_type_handle);
+		buf_type_handle_arr = dec_internal_buf_type_handle;
+	} else if (inst->domain == ENCODER) {
+		buf_type_handle_size = ARRAY_SIZE(enc_internal_buf_type_handle);
+		buf_type_handle_arr = enc_internal_buf_type_handle;
+	}
+
+	for (i = 0; i < buf_type_handle_size; i++) {
+		if (buf_type_handle_arr[i].type == buffer_type) {
+			size = buf_type_handle_arr[i].handle(inst);
+			break;
+		}
+	}
+
+	return size;
+}
+
 static u32 internal_buffer_count(struct iris_inst *inst,
 				 enum iris_buffer_type buffer_type)
 {
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
index 04f0b7400a1e4e1d274d690a2761b9e57778e8b7..fb544e8b3bf6b9ce86920a18537fd0a2c21cdc31 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
+++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
@@ -46,7 +46,7 @@ struct iris_inst;
 #define VP9_NUM_FRAME_INFO_BUF 32
 #define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4)
 #define VP9_PROB_TABLE_SIZE (3840)
-#define VP9_FRAME_INFO_BUF_SIZE (6144)
+#define VP9_FRAME_INFO_BUF_SIZE (6400)
 #define BUFFER_ALIGNMENT_32_BYTES 32
 #define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, BUFFER_ALIGNMENT_32_BYTES)
 #define MAX_SUPERFRAME_HEADER_LEN (34)
@@ -66,6 +66,8 @@ struct iris_inst;
 #define H265_CABAC_HDR_RATIO_HD_TOT 2
 #define H265_CABAC_RES_RATIO_HD_TOT 2
 #define SIZE_H265D_VPP_CMD_PER_BUF (256)
+#define SIZE_THREE_DIMENSION_USERDATA	768
+#define SIZE_H265D_ARP			9728
 
 #define VPX_DECODER_FRAME_CONCURENCY_LVL (2)
 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET 1
@@ -148,6 +150,7 @@ static inline u32 size_h264d_qp(u32 frame_width, u32 frame_height)
 
 u32 iris_vpu_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
 u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
+u32 iris_vpu4x_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
 int iris_vpu_buf_count(struct iris_inst *inst, enum iris_buffer_type buffer_type);
 
 #endif

-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ