[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5d15a2ce-3f2e-6097-88cd-3d61b201bc4c@quicinc.com>
Date: Fri, 8 Dec 2023 14:48:38 -0800
From: Abhinav Kumar <quic_abhinavk@...cinc.com>
To: Dmitry Baryshkov <dmitry.baryshkov@...aro.org>
CC: <freedreno@...ts.freedesktop.org>, <linux-arm-msm@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <dri-devel@...ts.freedesktop.org>,
<quic_parellan@...cinc.com>, <quic_jesszhan@...cinc.com>,
Marijn Suijten <marijn.suijten@...ainline.org>,
Sean Paul <sean@...rly.run>
Subject: Re: [PATCH v2 12/16] drm/msm/dpu: add an API to setup the CDM block
for writeback
On 12/8/2023 12:55 PM, Dmitry Baryshkov wrote:
> On Fri, 8 Dec 2023 at 19:28, Abhinav Kumar <quic_abhinavk@...cinc.com> wrote:
>> On 12/8/2023 3:52 AM, Dmitry Baryshkov wrote:
>>> On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk@...cinc.com> wrote:
>>>>
>>>> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
>>>> the writeback encoder to setup the CDM block.
>>>>
>>>> Currently, this is defined and used within the writeback's physical
>>>> encoder layer however, the function can be modified to be used to setup
>>>> the CDM block even for non-writeback interfaces.
>>>>
>>>> Until those modifications are planned and made, keep it local to
>>>> writeback.
>>>>
>>>> changes in v2:
>>>> - add the RGB2YUV CSC matrix to dpu util as needed by CDM
>>>> - use dpu_hw_get_csc_cfg() to get and program CSC
>>>> - drop usage of setup_csc_data() and setup_cdwn() cdm ops
>>>> as they both have been merged into enable()
>>>> - drop reduntant hw_cdm and hw_pp checks
>>>>
>>>> Signed-off-by: Abhinav Kumar <quic_abhinavk@...cinc.com>
>>>> ---
>>>> .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 3 +
>>>> .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 96 ++++++++++++++++++-
>>>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 17 ++++
>>>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 1 +
>>>> 4 files changed, 116 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> index 410f6225789c..1d6d1eb642b9 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> @@ -16,6 +16,7 @@
>>>> #include "dpu_hw_pingpong.h"
>>>> #include "dpu_hw_ctl.h"
>>>> #include "dpu_hw_top.h"
>>>> +#include "dpu_hw_cdm.h"
>>>> #include "dpu_encoder.h"
>>>> #include "dpu_crtc.h"
>>>>
>>>> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
>>>> * @wbirq_refcount: Reference count of writeback interrupt
>>>> * @wb_done_timeout_cnt: number of wb done irq timeout errors
>>>> * @wb_cfg: writeback block config to store fb related details
>>>> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
>>>> * @wb_conn: backpointer to writeback connector
>>>> * @wb_job: backpointer to current writeback job
>>>> * @dest: dpu buffer layout for current writeback output buffer
>>>> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
>>>> atomic_t wbirq_refcount;
>>>> int wb_done_timeout_cnt;
>>>> struct dpu_hw_wb_cfg wb_cfg;
>>>> + struct dpu_hw_cdm_cfg cdm_cfg;
>>>> struct drm_writeback_connector *wb_conn;
>>>> struct drm_writeback_job *wb_job;
>>>> struct dpu_hw_fmt_layout dest;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> index 4665367cf14f..85429c62d727 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
>>>> }
>>>> }
>>>>
>>>> +/**
>>>> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
>>>> + * @phys_enc:Pointer to physical encoder
>>>> + */
>>>> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
>>>> +{
>>>> + struct dpu_hw_cdm *hw_cdm;
>>>> + struct dpu_hw_cdm_cfg *cdm_cfg;
>>>> + struct dpu_hw_pingpong *hw_pp;
>>>> + struct dpu_encoder_phys_wb *wb_enc;
>>>> + const struct msm_format *format;
>>>> + const struct dpu_format *dpu_fmt;
>>>> + struct drm_writeback_job *wb_job;
>>>> + int ret;
>>>> +
>>>> + if (!phys_enc)
>>>> + return;
>>>> +
>>>> + wb_enc = to_dpu_encoder_phys_wb(phys_enc);
>>>> + cdm_cfg = &wb_enc->cdm_cfg;
>>>> + hw_pp = phys_enc->hw_pp;
>>>> + hw_cdm = phys_enc->hw_cdm;
>>>> + wb_job = wb_enc->wb_job;
>>>> +
>>>> + format = msm_framebuffer_format(wb_enc->wb_job->fb);
>>>> + dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
>>>> +
>>>> + if (!hw_cdm)
>>>> + return;
>>>> +
>>>> + if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
>>>> + DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
>>>> + dpu_fmt->base.pixel_format);
>>>> + if (hw_cdm->ops.disable)
>>>> + hw_cdm->ops.disable(hw_cdm);
>>>> +
>>>> + return;
>>>> + }
>>>> +
>>>> + memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
>>>> +
>>>> + cdm_cfg->output_width = wb_job->fb->width;
>>>> + cdm_cfg->output_height = wb_job->fb->height;
>>>> + cdm_cfg->output_fmt = dpu_fmt;
>>>> + cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
>>>> + cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
>>>> + CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
>>>> + cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
>>>> + if (!cdm_cfg->csc_cfg) {
>>>> + DPU_ERROR("valid csc not found\n");
>>>> + return;
>>>> + }
>>>> +
>>>> + /* enable 10 bit logic */
>>>> + switch (cdm_cfg->output_fmt->chroma_sample) {
>>>> + case DPU_CHROMA_RGB:
>>>> + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> + break;
>>>> + case DPU_CHROMA_H2V1:
>>>> + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> + break;
>>>> + case DPU_CHROMA_420:
>>>> + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> + cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
>>>> + break;
>>>> + case DPU_CHROMA_H1V2:
>>>> + default:
>>>> + DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
>>>> + DRMID(phys_enc->parent));
>>>> + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>
>>> If it is unsupported, we should return an error here.
>>>
>>
>> The caller of this API and the caller of the API even before that do not
>> have error checking as they are all void. Disabling CDWN is the
>> appropriate corrective action for this case and should be sufficient.
>
> Ack. Could you please document that DPU_CHROMA_H1V2 is invalid for this API?
>
>>
>>>> + break;
>>>> + }
>>>> +
>>>> + DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
>>>> + DRMID(phys_enc->parent), cdm_cfg->output_width,
>>>> + cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
>>>> + cdm_cfg->output_type, cdm_cfg->output_bit_depth,
>>>> + cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
>>>> +
>>>> + if (hw_cdm->ops.enable) {
>>>> + cdm_cfg->pp_id = hw_pp->idx;
>>>> + ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
>>>> + if (ret < 0) {
>>>> + DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
>>>> + DRMID(phys_enc->parent), ret);
>>>> + return;
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> /**
>>>> * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
>>>> * @phys_enc: Pointer to physical encoder
>>>> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
>>>>
>>>> dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
>>>>
>>>> - dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>> + dpu_encoder_helper_phys_setup_cdm(phys_enc);
>>>>
>>>> + dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>> }
>>>>
>>>> /**
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> index 59a153331194..34143491aba2 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
>>>> #define QOS_QOS_CTRL_VBLANK_EN BIT(16)
>>>> #define QOS_QOS_CTRL_CREQ_VBLANK_MASK GENMASK(21, 20)
>>>>
>>>> +#define TO_S15D16(_x_)((_x_) << 7)
>>>
>>> Huh? I don't understand why it is shifted by 7. If you have data in
>>> S8.9 format, I'd say that it makes things less obvious compared to
>>> S15.16 (where you can perform division on the fly).
>>>
>>
>> I was referring to below comment and also because the values are in
>> S15.16 in
>>
>> https://git.codelinaro.org/clo/la/platform/vendor/opensource/display-drivers/-/blob/clo/main/msm/sde/sde_encoder_phys_wb.c?ref_type=heads#L35
>
> Yes, I saw that. My first thought was that <<7 looked
> counterintuitive. You can not perform maths easily with S4.9 values.
> At least I can not guess the actual value of 0x083 or 0xff37.
>
> I thought about writing data in S15.16 directly (e.g. 0x4180 instead
> of 0x083, 0x8100 instead of 0x102). This way it would be easier to
> understand that the first row is 65.5 / 256, 128 / 256, 25 / 256. And
> this is what we had for existing DPU CSC tables.
>
> But after looking at the dpu_hw_csc_setup() I actually fail to
> understand why we need the S15.16 values. The hardware works with S4.9
> in the end. So it looks pretty strange to convert S4.9 to S15.16 via
> the macro only to convert it back to S4.9 in the CSC setup code.
>
> Maybe we can go the following route:
> - Merge this series using CSC tables as is (with your TO_S15D16 macro)
> - Drop the <<7 from dpu_hw_csc_setup() (and drop the macro too)
> - Define S4P9(floor, nom, den) and S4P9_NEG(floor, nom, den) macros to
> be used for the matrix values
> - Merge the DPU tables with MDP tables (which use S4.9 already)
>
> I'd also like to check if we can drop two versions of clamp values
> (for 8bit and for 10bit) and convert those values on the fly.
>
Yes, I am fine with this approach. That will also allow me to
investigate the tables not just from WB context but from others as well.
>>
>> 428 struct dpu_csc_cfg {
>> 429 /* matrix coefficients in S15.16 format */
>> 430 uint32_t csc_mv[DPU_CSC_MATRIX_COEFF_SIZE];
>> 431 uint32_t csc_pre_bv[DPU_CSC_BIAS_SIZE];
>> 432 uint32_t csc_post_bv[DPU_CSC_BIAS_SIZE];
>> 433 uint32_t csc_pre_lv[DPU_CSC_CLAMP_SIZE];
>> 434 uint32_t csc_post_lv[DPU_CSC_CLAMP_SIZE];
>> 435 };
>> 436
>>
>>
>>>> +
>>>> static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
>>>> {
>>>> /* S15.16 format */
>>>> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
>>>> { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
>>>> };
>>>>
>>>> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
>>>> + {
>>>> + TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
>>>> + TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
>>>> + TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
>>>> + },
>>>> + { 0x00, 0x00, 0x00 },
>>>> + { 0x0040, 0x0200, 0x0200 },
>>>> + { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
>>>> + { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
>>>> +};
>>>> +
>>>> /**
>>>> * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
>>>> * @type: type of the requested CSC matrix from caller
>>>> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
>>>> case DPU_HW_YUV2RGB_601L_10BIT:
>>>> csc_cfg = &dpu_csc10_YUV2RGB_601L;
>>>> break;
>>>> + case DPU_HW_RGB2YUV_601L_10BIT:
>>>> + csc_cfg = &dpu_csc10_rgb2yuv_601l;
>>>> + break;
>>>> default:
>>>> DPU_ERROR("unknown csc_cfg type\n");
>>>> break;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> index 49f2bcf6de15..ed153d66f660 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> @@ -22,6 +22,7 @@
>>>> enum dpu_hw_csc_cfg_type {
>>>> DPU_HW_YUV2RGB_601L,
>>>> DPU_HW_YUV2RGB_601L_10BIT,
>>>> + DPU_HW_RGB2YUV_601L_10BIT,
>>>> };
>>>>
>>>> /*
>>>> --
>>>> 2.40.1
>>>>
>>>
>>>
>
>
>
> --
> With best wishes
> Dmitry
Powered by blists - more mailing lists