lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230123220848.tmvqxawr6vfs6b2v@SoMainline.org>
Date:   Mon, 23 Jan 2023 23:08:48 +0100
From:   Marijn Suijten <marijn.suijten@...ainline.org>
To:     Kuogee Hsieh <quic_khsieh@...cinc.com>
Cc:     dri-devel@...ts.freedesktop.org, robdclark@...il.com,
        sean@...rly.run, swboyd@...omium.org, dianders@...omium.org,
        vkoul@...nel.org, daniel@...ll.ch, airlied@...il.com,
        agross@...nel.org, dmitry.baryshkov@...aro.org,
        andersson@...nel.org, quic_abhinavk@...cinc.com,
        quic_sbillaka@...cinc.com, freedreno@...ts.freedesktop.org,
        linux-arm-msm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 07/14] drm/msm/dp: add dsc helper functions

This has nothing to do with /dp, make it /dpu


On 2023-01-23 10:24:27, Kuogee Hsieh wrote:
> Add DSC related supporting functions to calculate DSC related parameters.
> In addition, DSC hardware encoder customized configuration parameters are
> also included. Algorithms used to perform calculation are derived from
> system engineer spreadsheet.
> 
> Signed-off-by: Kuogee Hsieh <quic_khsieh@...cinc.com>
> ---
>  drivers/gpu/drm/msm/Makefile                   |   1 +
>  drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.c | 537 +++++++++++++++++++++++++
>  drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.h |  25 ++
>  drivers/gpu/drm/msm/msm_drv.h                  |   4 +
>  4 files changed, 567 insertions(+)
>  create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.c
>  create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.h
> 
> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> index 7274c412..28cf52b 100644
> --- a/drivers/gpu/drm/msm/Makefile
> +++ b/drivers/gpu/drm/msm/Makefile
> @@ -65,6 +65,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
>  	disp/dpu1/dpu_hw_catalog.o \
>  	disp/dpu1/dpu_hw_ctl.o \
>  	disp/dpu1/dpu_hw_dsc.o \
> +	disp/dpu1/dpu_dsc_helper.o \
>  	disp/dpu1/dpu_hw_interrupts.o \
>  	disp/dpu1/dpu_hw_intf.o \
>  	disp/dpu1/dpu_hw_lm.o \
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.c
> new file mode 100644
> index 00000000..48cef23
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.c
> @@ -0,0 +1,537 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2012-2023 The Linux Foundation. All rights reserved.
> + * Copyright (c) 2023. Qualcomm Innovation Center, Inc. All rights reserved
> + */
> +
> +#include "msm_drv.h"
> +#include "dpu_kms.h"
> +#include "dpu_hw_dsc.h"
> +#include "dpu_dsc_helper.h"
> +
> +
> +#define DPU_DSC_PPS_SIZE       128
> +
> +enum dpu_dsc_ratio_type {
> +	DSC_V11_8BPC_8BPP,
> +	DSC_V11_10BPC_8BPP,
> +	DSC_V11_10BPC_10BPP,
> +	DSC_V11_SCR1_8BPC_8BPP,
> +	DSC_V11_SCR1_10BPC_8BPP,
> +	DSC_V11_SCR1_10BPC_10BPP,
> +	DSC_V12_444_8BPC_8BPP = DSC_V11_SCR1_8BPC_8BPP,
> +	DSC_V12_444_10BPC_8BPP = DSC_V11_SCR1_10BPC_8BPP,
> +	DSC_V12_444_10BPC_10BPP = DSC_V11_SCR1_10BPC_10BPP,
> +	DSC_V12_422_8BPC_7BPP,
> +	DSC_V12_422_8BPC_8BPP,
> +	DSC_V12_422_10BPC_7BPP,
> +	DSC_V12_422_10BPC_10BPP,
> +	DSC_V12_420_8BPC_6BPP,
> +	DSC_V12_420_10BPC_6BPP,
> +	DSC_V12_420_10BPC_7_5BPP,
> +	DSC_RATIO_TYPE_MAX
> +};
> +
> +
> +static u16 dpu_dsc_rc_buf_thresh[DSC_NUM_BUF_RANGES - 1] = {
> +		0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54,
> +		0x62, 0x69, 0x70, 0x77, 0x79, 0x7b, 0x7d, 0x7e
> +};
> +
> +/*
> + * Rate control - Min QP values for each ratio type in dpu_dsc_ratio_type
> + */
> +static char dpu_dsc_rc_range_min_qp[DSC_RATIO_TYPE_MAX][DSC_NUM_BUF_RANGES] = {
> +	/* DSC v1.1 */
> +	{0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 13},
> +	{0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 17},
> +	{0, 4, 5, 6, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 15},
> +	/* DSC v1.1 SCR and DSC v1.2 RGB 444 */
> +	{0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 12},
> +	{0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 16},
> +	{0, 4, 5, 6, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 15},
> +	/* DSC v1.2 YUV422 */
> +	{0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11},
> +	{0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10},
> +	{0, 4, 5, 6, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 15},
> +	{0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12},
> +	/* DSC v1.2 YUV420 */
> +	{0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10},
> +	{0, 2, 3, 4, 6, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14},
> +	{0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12},
> +};
> +
> +/*
> + * Rate control - Max QP values for each ratio type in dpu_dsc_ratio_type
> + */
> +static char dpu_dsc_rc_range_max_qp[DSC_RATIO_TYPE_MAX][DSC_NUM_BUF_RANGES] = {
> +	/* DSC v1.1 */
> +	{4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 11, 12, 13, 13, 15},
> +	{4, 8, 9, 10, 11, 11, 11, 12, 13, 14, 15, 16, 17, 17, 19},
> +	{7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16},
> +	/* DSC v1.1 SCR and DSC v1.2 RGB 444 */
> +	{4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13},
> +	{8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17},
> +	{7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16},
> +	/* DSC v1.2 YUV422 */
> +	{3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12},
> +	{2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11},
> +	{7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16},
> +	{2, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13},
> +	/* DSC v1.2 YUV420 */
> +	{2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 12},
> +	{2, 5, 7, 8, 9, 10, 11, 12, 12, 13, 13, 13, 13, 14, 15},
> +	{2, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13},
> +	};
> +
> +/*
> + * Rate control - bpg offset values for each ratio type in dpu_dsc_ratio_type
> + */
> +static char dpu_dsc_rc_range_bpg[DSC_RATIO_TYPE_MAX][DSC_NUM_BUF_RANGES] = {
> +	/* DSC v1.1 */
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12},
> +	/* DSC v1.1 SCR and DSC V1.2 RGB 444 */
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12},
> +	/* DSC v1.2 YUV422 */
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12},
> +	{10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12},
> +	/* DSC v1.2 YUV420 */
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12},
> +	{10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12},
> +};
> +
> +static struct dpu_dsc_rc_init_params_lut {
> +	u32 rc_quant_incr_limit0;
> +	u32 rc_quant_incr_limit1;
> +	u32 initial_fullness_offset;
> +	u32 initial_xmit_delay;
> +	u32 second_line_bpg_offset;
> +	u32 second_line_offset_adj;
> +	u32 flatness_min_qp;
> +	u32 flatness_max_qp;
> +}  dpu_dsc_rc_init_param_lut[] = {
> +	/* DSC v1.1 */
> +	{11, 11, 6144, 512, 0, 0, 3, 12}, /* DSC_V11_8BPC_8BPP */
> +	{15, 15, 6144, 512, 0, 0, 7, 16}, /* DSC_V11_10BPC_8BPP */
> +	{15, 15, 5632, 410, 0, 0, 7, 16}, /* DSC_V11_10BPC_10BPP */
> +	/* DSC v1.1 SCR and DSC v1.2 RGB 444 */
> +	{11, 11, 6144, 512, 0, 0, 3, 12}, /* DSC_V12_444_8BPC_8BPP or DSC_V11_SCR1_8BPC_8BPP */
> +	{15, 15, 6144, 512, 0, 0, 7, 16}, /* DSC_V12_444_10BPC_8BPP or DSC_V11_SCR1_10BPC_8BPP */
> +	{15, 15, 5632, 410, 0, 0, 7, 16}, /* DSC_V12_444_10BPC_10BPP or DSC_V11_SCR1_10BPC_10BPP */
> +	/* DSC v1.2 YUV422 */
> +	{11, 11, 5632, 410, 0, 0, 3, 12}, /* DSC_V12_422_8BPC_7BPP */
> +	{11, 11, 2048, 341, 0, 0, 3, 12}, /* DSC_V12_422_8BPC_8BPP */
> +	{15, 15, 5632, 410, 0, 0, 7, 16}, /* DSC_V12_422_10BPC_7BPP */
> +	{15, 15, 2048, 273, 0, 0, 7, 16}, /* DSC_V12_422_10BPC_10BPP */
> +	/* DSC v1.2 YUV420 */
> +	{11, 11, 5632, 410, 0, 0, 3, 12},    /* DSC_V12_422_8BPC_7BPP */
> +	{11, 11, 2048, 341, 12, 512, 3, 12}, /* DSC_V12_420_8BPC_6BPP */
> +	{15, 15, 2048, 341, 12, 512, 7, 16}, /* DSC_V12_420_10BPC_6BPP */
> +	{15, 15, 2048, 256, 12, 512, 7, 16}, /* DSC_V12_420_10BPC_7_5BPP */
> +};
> +
> +/**
> + * Maps to lookup the dpu_dsc_ratio_type index used in rate control tables
> + */
> +static struct dpu_dsc_table_index_lut {
> +	u32 fmt;
> +	u32 scr_ver;
> +	u32 minor_ver;
> +	u32 bpc;
> +	u32 bpp;
> +	u32 type;
> +} dpu_dsc_index_map[] = {
> +	/* DSC 1.1 formats - scr version is considered */
> +	{MSM_CHROMA_444, 0, 1, 8, 8, DSC_V11_8BPC_8BPP},
> +	{MSM_CHROMA_444, 0, 1, 10, 8, DSC_V11_10BPC_8BPP},
> +	{MSM_CHROMA_444, 0, 1, 10, 10, DSC_V11_10BPC_10BPP},
> +
> +	{MSM_CHROMA_444, 1, 1, 8, 8, DSC_V11_SCR1_8BPC_8BPP},
> +	{MSM_CHROMA_444, 1, 1, 10, 8, DSC_V11_SCR1_10BPC_8BPP},
> +	{MSM_CHROMA_444, 1, 1, 10, 10, DSC_V11_SCR1_10BPC_10BPP},
> +
> +	/* DSC 1.2 formats - scr version is no-op */
> +	{MSM_CHROMA_444, -1, 2, 8, 8, DSC_V12_444_8BPC_8BPP},
> +	{MSM_CHROMA_444, -1, 2, 10, 8, DSC_V12_444_10BPC_8BPP},
> +	{MSM_CHROMA_444, -1, 2, 10, 10, DSC_V12_444_10BPC_10BPP},
> +
> +	{MSM_CHROMA_422, -1, 2, 8, 7, DSC_V12_422_8BPC_7BPP},
> +	{MSM_CHROMA_422, -1, 2, 8, 8, DSC_V12_422_8BPC_8BPP},
> +	{MSM_CHROMA_422, -1, 2, 10, 7, DSC_V12_422_10BPC_7BPP},
> +	{MSM_CHROMA_422, -1, 2, 10, 10, DSC_V12_422_10BPC_10BPP},
> +
> +	{MSM_CHROMA_420, -1, 2, 8, 6, DSC_V12_420_8BPC_6BPP},
> +	{MSM_CHROMA_420, -1, 2, 10, 6, DSC_V12_420_10BPC_6BPP},
> +};
> +
> +static int _get_rc_table_index(struct drm_dsc_config *dsc, int scr_ver)
> +{
> +	u32 bpp, bpc, i, fmt = MSM_CHROMA_444;
> +
> +	if (dsc->dsc_version_major != 0x1) {
> +		DPU_ERROR("unsupported major version %d\n",
> +				dsc->dsc_version_major);
> +		return -EINVAL;
> +	}
> +
> +	bpc = dsc->bits_per_component;
> +	bpp = DSC_BPP(*dsc);
> +
> +	if (dsc->native_422)
> +		fmt = MSM_CHROMA_422;
> +	else if (dsc->native_420)
> +		fmt = MSM_CHROMA_420;
> +
> +
> +	for (i = 0; i < ARRAY_SIZE(dpu_dsc_index_map); i++) {
> +		if (dsc->dsc_version_minor == dpu_dsc_index_map[i].minor_ver &&
> +				fmt ==  dpu_dsc_index_map[i].fmt &&
> +				bpc == dpu_dsc_index_map[i].bpc &&
> +				bpp == dpu_dsc_index_map[i].bpp &&
> +				(dsc->dsc_version_minor != 0x1 ||
> +					scr_ver == dpu_dsc_index_map[i].scr_ver))
> +			return dpu_dsc_index_map[i].type;
> +	}
> +
> +	DPU_ERROR("unsupported DSC v%d.%dr%d, bpc:%d, bpp:%d, fmt:0x%x\n",
> +			dsc->dsc_version_major, dsc->dsc_version_minor,
> +			scr_ver, bpc, bpp, fmt);
> +	return -EINVAL;
> +}
> +
> +u8 _get_dsc_v1_2_bpg_offset(struct drm_dsc_config *dsc)
> +{
> +	u8 bpg_offset = 0;
> +	u8 uncompressed_bpg_rate;
> +	u8 bpp = DSC_BPP(*dsc);
> +
> +	if (dsc->slice_height < 8)
> +		bpg_offset = 2 * (dsc->slice_height - 1);
> +	else if (dsc->slice_height < 20)
> +		bpg_offset = 12;
> +	else if (dsc->slice_height <= 30)
> +		bpg_offset = 13;
> +	else if (dsc->slice_height < 42)
> +		bpg_offset = 14;
> +	else
> +		bpg_offset = 15;
> +
> +	if (dsc->native_422)
> +		uncompressed_bpg_rate = 3 * bpp * 4;
> +	else if (dsc->native_420)
> +		uncompressed_bpg_rate = 3 * bpp;
> +	else
> +		uncompressed_bpg_rate = (3 * bpp + 2) * 3;
> +
> +	if (bpg_offset < (uncompressed_bpg_rate - (3 * bpp)))
> +		return bpg_offset;
> +	else
> +		return (uncompressed_bpg_rate - (3 * bpp));
> +}
> +
> +int dpu_dsc_populate_dsc_config(struct drm_dsc_config *dsc, int scr_ver)

We just got rid of this /wrong/ downstream gunk in [1] in favour of the
upstreamed drm_dsc_compute_rc_parameters(), don't add it back.

[1]: https://lore.kernel.org/linux-arm-msm/20221026182824.876933-7-marijn.suijten@somainline.org/

> +{
> +	int bpp, bpc;
> +	int groups_per_line, groups_total;
> +	int min_rate_buffer_size;
> +	int hrd_delay;
> +	int pre_num_extra_mux_bits, num_extra_mux_bits;
> +	int slice_bits;
> +	int data;
> +	int final_value, final_scale;
> +	struct dpu_dsc_rc_init_params_lut *rc_param_lut;
> +	u32 slice_width_mod;
> +	int i, ratio_idx;
> +
> +	dsc->rc_model_size = 8192;
> +
> +	if ((dsc->dsc_version_major == 0x1) &&
> +			(dsc->dsc_version_minor == 0x1)) {
> +		if (scr_ver == 0x1)
> +			dsc->first_line_bpg_offset = 15;
> +		else
> +			dsc->first_line_bpg_offset = 12;
> +	} else if (dsc->dsc_version_minor == 0x2) {
> +		dsc->first_line_bpg_offset = _get_dsc_v1_2_bpg_offset(dsc);
> +	}
> +
> +	dsc->rc_edge_factor = 6;
> +	dsc->rc_tgt_offset_high = 3;
> +	dsc->rc_tgt_offset_low = 3;
> +	dsc->simple_422 = 0;
> +	dsc->convert_rgb = !(dsc->native_422 | dsc->native_420);
> +	dsc->vbr_enable = 0;
> +
> +	bpp = DSC_BPP(*dsc);
> +	bpc = dsc->bits_per_component;
> +
> +	ratio_idx = _get_rc_table_index(dsc, scr_ver);
> +	if ((ratio_idx < 0) || (ratio_idx >= DSC_RATIO_TYPE_MAX))
> +		return -EINVAL;
> +
> +
> +	for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++)
> +		dsc->rc_buf_thresh[i] = dpu_dsc_rc_buf_thresh[i];
> +
> +	for (i = 0; i < DSC_NUM_BUF_RANGES; i++) {
> +		dsc->rc_range_params[i].range_min_qp =
> +			dpu_dsc_rc_range_min_qp[ratio_idx][i];
> +		dsc->rc_range_params[i].range_max_qp =
> +			dpu_dsc_rc_range_max_qp[ratio_idx][i];
> +		dsc->rc_range_params[i].range_bpg_offset =
> +			dpu_dsc_rc_range_bpg[ratio_idx][i];
> +	}
> +
> +	rc_param_lut = &dpu_dsc_rc_init_param_lut[ratio_idx];
> +	dsc->rc_quant_incr_limit0 = rc_param_lut->rc_quant_incr_limit0;
> +	dsc->rc_quant_incr_limit1 = rc_param_lut->rc_quant_incr_limit1;
> +	dsc->initial_offset = rc_param_lut->initial_fullness_offset;
> +	dsc->initial_xmit_delay = rc_param_lut->initial_xmit_delay;
> +	dsc->second_line_bpg_offset = rc_param_lut->second_line_bpg_offset;
> +	dsc->second_line_offset_adj = rc_param_lut->second_line_offset_adj;
> +	dsc->flatness_min_qp = rc_param_lut->flatness_min_qp;
> +	dsc->flatness_max_qp = rc_param_lut->flatness_max_qp;
> +
> +	slice_width_mod = dsc->slice_width;
> +	if (dsc->native_422 || dsc->native_420) {
> +		slice_width_mod = dsc->slice_width / 2;
> +		bpp = bpp * 2;
> +	}
> +
> +	dsc->line_buf_depth = bpc + 1;
> +	dsc->mux_word_size = bpc > 10 ? DSC_MUX_WORD_SIZE_12_BPC : DSC_MUX_WORD_SIZE_8_10_BPC;
> +
> +	if ((dsc->dsc_version_minor == 0x2) && (dsc->native_420))
> +		dsc->nsl_bpg_offset = (2048 * (DIV_ROUND_UP(dsc->second_line_bpg_offset,
> +				(dsc->slice_height - 1))));
> +
> +	groups_per_line = DIV_ROUND_UP(slice_width_mod, 3);
> +
> +	dsc->slice_chunk_size = slice_width_mod * bpp / 8;
> +	if ((slice_width_mod * bpp) % 8)
> +		dsc->slice_chunk_size++;

Besides this code being completely superfluous, please familiarize
yourself with prior DSC contributions and review, and don't add (back)
patterns that were rejected or cleaned up.

- Marijn

> +
> +	/* rbs-min */
> +	min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
> +			dsc->initial_xmit_delay * bpp +
> +			groups_per_line * dsc->first_line_bpg_offset;
> +
> +	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
> +
> +	dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
> +
> +	dsc->initial_scale_value = 8 * dsc->rc_model_size /
> +			(dsc->rc_model_size - dsc->initial_offset);
> +
> +	slice_bits = 8 * dsc->slice_chunk_size * dsc->slice_height;
> +
> +	groups_total = groups_per_line * dsc->slice_height;
> +
> +	data = dsc->first_line_bpg_offset * 2048;
> +
> +	dsc->nfl_bpg_offset = DIV_ROUND_UP(data, (dsc->slice_height - 1));
> +
> +	if (dsc->native_422)
> +		pre_num_extra_mux_bits = 4 * dsc->mux_word_size + (4 * bpc + 4) + (3 * 4 * bpc) - 2;
> +	else if (dsc->native_420)
> +		pre_num_extra_mux_bits = 3 * dsc->mux_word_size + (4 * bpc + 4) + (2 * 4 * bpc) - 2;
> +	else
> +		pre_num_extra_mux_bits = 3 * (dsc->mux_word_size + (4 * bpc + 4) - 2);
> +
> +	num_extra_mux_bits = pre_num_extra_mux_bits - (dsc->mux_word_size -
> +		((slice_bits - pre_num_extra_mux_bits) % dsc->mux_word_size));
> +
> +	data = 2048 * (dsc->rc_model_size - dsc->initial_offset
> +		+ num_extra_mux_bits);
> +	dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
> +
> +	data = dsc->initial_xmit_delay * bpp;
> +	final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
> +
> +	final_scale = 8 * dsc->rc_model_size /
> +		(dsc->rc_model_size - final_value);
> +
> +	dsc->final_offset = final_value;
> +
> +	data = (final_scale - 9) * (dsc->nfl_bpg_offset +
> +		dsc->slice_bpg_offset);
> +	dsc->scale_increment_interval = (2048 * dsc->final_offset) / data;
> +
> +	dsc->scale_decrement_interval = groups_per_line /
> +		(dsc->initial_scale_value - 8);
> +
> +	return 0;
> +}
> +
> +bool dpu_dsc_ich_reset_override_needed(bool pu_en,
> +		struct msm_display_dsc_info *dsc_info)
> +{
> +	/*
> +	 * As per the DSC spec, ICH_RESET can be either end of the slice line
> +	 * or at the end of the slice. HW internally generates ich_reset at
> +	 * end of the slice line if DSC_MERGE is used or encoder has two
> +	 * soft slices. However, if encoder has only 1 soft slice and DSC_MERGE
> +	 * is not used then it will generate ich_reset at the end of slice.
> +	 *
> +	 * Now as per the spec, during one PPS session, position where
> +	 * ich_reset is generated should not change. Now if full-screen frame
> +	 * has more than 1 soft slice then HW will automatically generate
> +	 * ich_reset at the end of slice_line. But for the same panel, if
> +	 * partial frame is enabled and only 1 encoder is used with 1 slice,
> +	 * then HW will generate ich_reset at end of the slice. This is a
> +	 * mismatch. Prevent this by overriding HW's decision.
> +	 */
> +	return pu_en && dsc_info && (dsc_info->drm_dsc.slice_count > 1) &&
> +		(dsc_info->drm_dsc.slice_width == dsc_info->drm_dsc.pic_width);
> +}
> +
> +int dpu_dsc_initial_line_calc(struct msm_display_dsc_info *dsc_info,
> +				int enc_ip_width, int dsc_cmn_mode)
> +{
> +	int max_ssm_delay, max_se_size, max_muxword_size;
> +	int compress_bpp_group, obuf_latency, input_ssm_out_latency;
> +	int base_hs_latency, chunk_bits, ob_data_width;
> +	int output_rate_extra_budget_bits, multi_hs_extra_budget_bits;
> +	int multi_hs_extra_latency,  mux_word_size;
> +	int ob_data_width_4comps, ob_data_width_3comps;
> +	int output_rate_ratio_complement, container_slice_width;
> +	int rtl_num_components, multi_hs_c, multi_hs_d;
> +
> +	int bpc = dsc_info->drm_dsc.bits_per_component;
> +	int bpp = DSC_BPP(dsc_info->drm_dsc);
> +	bool native_422 = dsc_info->drm_dsc.native_422;
> +	bool native_420 = dsc_info->drm_dsc.native_420;
> +
> +	/* Hardent core config */
> +	int multiplex_mode_enable = 0, split_panel_enable = 0;
> +	int rtl_max_bpc = 10, rtl_output_data_width = 64;
> +	int pipeline_latency = 28;
> +
> +	if (dsc_cmn_mode & DSC_MODE_MULTIPLEX)
> +		multiplex_mode_enable = 1;
> +	if (dsc_cmn_mode & DSC_MODE_SPLIT_PANEL)
> +		split_panel_enable = 1;
> +	container_slice_width = (native_422 ?
> +			dsc_info->drm_dsc.slice_width / 2 : dsc_info->drm_dsc.slice_width);
> +	max_muxword_size = (rtl_max_bpc >= 12) ? 64 : 48;
> +	max_se_size = 4 * (rtl_max_bpc + 1);
> +	max_ssm_delay = max_se_size + max_muxword_size - 1;
> +	mux_word_size = (bpc >= 12) ? 64 : 48;
> +	compress_bpp_group = native_422 ? (2 * bpp) : bpp;
> +	input_ssm_out_latency = pipeline_latency + 3 * (max_ssm_delay + 2)
> +			* dsc_info->num_active_ss_per_enc;
> +	rtl_num_components = (native_420 || native_422) ? 4 : 3;
> +	ob_data_width_4comps = (rtl_output_data_width >= (2 *
> +			max_muxword_size)) ?
> +			rtl_output_data_width :
> +			(2 * rtl_output_data_width);
> +	ob_data_width_3comps = (rtl_output_data_width >= max_muxword_size) ?
> +			rtl_output_data_width : 2 * rtl_output_data_width;
> +	ob_data_width = (rtl_num_components == 4) ?
> +			ob_data_width_4comps : ob_data_width_3comps;
> +	obuf_latency = DIV_ROUND_UP((9 * ob_data_width + mux_word_size),
> +			compress_bpp_group) + 1;
> +	base_hs_latency = dsc_info->drm_dsc.initial_xmit_delay +
> +		input_ssm_out_latency + obuf_latency;
> +	chunk_bits = 8 * dsc_info->drm_dsc.slice_chunk_size;
> +	output_rate_ratio_complement = ob_data_width - compress_bpp_group;
> +	output_rate_extra_budget_bits =
> +		(output_rate_ratio_complement * chunk_bits) >>
> +		((ob_data_width == 128) ? 7 : 6);
> +	multi_hs_c = split_panel_enable * multiplex_mode_enable;
> +	multi_hs_d = (dsc_info->num_active_ss_per_enc > 1) * (ob_data_width > compress_bpp_group);
> +	multi_hs_extra_budget_bits = multi_hs_c ?
> +				chunk_bits : (multi_hs_d ? chunk_bits :
> +					output_rate_extra_budget_bits);
> +	multi_hs_extra_latency = DIV_ROUND_UP(multi_hs_extra_budget_bits,
> +			compress_bpp_group);
> +	dsc_info->initial_lines = DIV_ROUND_UP((base_hs_latency +
> +				multi_hs_extra_latency),
> +			container_slice_width);
> +
> +	return 0;
> +}
> +
> +int dpu_dsc_populate_dsc_private_params(struct msm_display_dsc_info *dsc_info,
> +					int intf_width)
> +{
> +	int  mod_offset;
> +	int slice_per_pkt, slice_per_intf;
> +	int bytes_in_slice, total_bytes_per_intf;
> +	u16 bpp;
> +	u32 bytes_in_dsc_pair;
> +	u32 total_bytes_in_dsc_pair;
> +
> +	if (!dsc_info || !dsc_info->drm_dsc.slice_width ||
> +			!dsc_info->drm_dsc.slice_height ||
> +			intf_width < dsc_info->drm_dsc.slice_width) {
> +		DPU_ERROR("invalid input, intf_width=%d slice_width=%d\n",
> +			intf_width, dsc_info ? dsc_info->drm_dsc.slice_width :
> +			-1);
> +		return -EINVAL;
> +	}
> +
> +	mod_offset = dsc_info->drm_dsc.slice_width % 3;
> +
> +
> +	switch (mod_offset) {
> +	case 0:
> +		dsc_info->slice_last_group_size = 2;
> +		break;
> +	case 1:
> +		dsc_info->slice_last_group_size = 0;
> +		break;
> +	case 2:
> +		dsc_info->slice_last_group_size = 1;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	dsc_info->det_thresh_flatness =
> +		2 << (dsc_info->drm_dsc.bits_per_component - 8);
> +
> +	slice_per_pkt = dsc_info->slice_per_pkt;
> +	slice_per_intf = DIV_ROUND_UP(intf_width,
> +			dsc_info->drm_dsc.slice_width);
> +
> +
> +	/*
> +	 * If slice_per_pkt is greater than slice_per_intf then default to 1.
> +	 * This can happen during partial update.
> +	 */
> +	if (slice_per_pkt > slice_per_intf)
> +		slice_per_pkt = 1;
> +
> +	bpp = DSC_BPP(dsc_info->drm_dsc);
> +	bytes_in_slice = DIV_ROUND_UP(dsc_info->drm_dsc.slice_width * bpp, 8);
> +	total_bytes_per_intf = bytes_in_slice * slice_per_intf;
> +
> +
> +	dsc_info->eol_byte_num = total_bytes_per_intf % 3;
> +	dsc_info->pclk_per_line =  DIV_ROUND_UP(total_bytes_per_intf, 3);
> +	dsc_info->bytes_in_slice = bytes_in_slice;
> +	dsc_info->bytes_per_pkt = bytes_in_slice * slice_per_pkt;
> +	dsc_info->pkt_per_line = slice_per_intf / slice_per_pkt;
> +
> +
> +	bytes_in_dsc_pair = DIV_ROUND_UP(bytes_in_slice * 2, 3);
> +	if (bytes_in_dsc_pair % 8) {
> +		dsc_info->dsc_4hsmerge_padding = 8 - (bytes_in_dsc_pair % 8);
> +		total_bytes_in_dsc_pair = bytes_in_dsc_pair +
> +				dsc_info->dsc_4hsmerge_padding;
> +		if (total_bytes_in_dsc_pair % 16)
> +			dsc_info->dsc_4hsmerge_alignment = 16 -
> +					(total_bytes_in_dsc_pair % 16);
> +	}
> +
> +	return 0;
> +}
> +
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.h
> new file mode 100644
> index 00000000..9f26455
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_dsc_helper.h
> @@ -0,0 +1,25 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2020 - 2023 The Linux Foundation. All rights reserved.
> + * Copyright (c) 2023. Qualcomm Innovation Center, Inc. All rights reserved
> + */
> +
> +#ifndef __DPU_DSC_HELPER_H__
> +#define __DPU_DSC_HELPER_H__
> +
> +#include "msm_drv.h"
> +
> +#define DSC_1_1_PPS_PARAMETER_SET_ELEMENTS   88
> +
> +int dpu_dsc_populate_dsc_config(struct drm_dsc_config *dsc, int scr_ver);
> +
> +int dpu_dsc_populate_dsc_private_params(struct msm_display_dsc_info *dsc_info,
> +					int intf_width);
> +
> +bool dpu_dsc_ich_reset_override_needed(bool pu_en, struct msm_display_dsc_info *dsc);
> +
> +int dpu_dsc_initial_line_calc(struct msm_display_dsc_info *dsc,
> +				int enc_ip_width, int dsc_cmn_mode);
> +
> +#endif /* __DPU_DSC_HELPER_H__ */
> +
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index f155803..cf4eb8d 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -57,6 +57,10 @@ struct msm_disp_state;
>  #define MAX_CRTCS      8
>  #define MAX_BRIDGES    8
>  
> +#define MSM_CHROMA_444 0x0
> +#define MSM_CHROMA_422 0x1
> +#define MSM_CHROMA_420 0x2
> +
>  #define FRAC_16_16(mult, div)    (((mult) << 16) / (div))
>  
>  enum msm_dp_controller {
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ