lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CADaigPVbUZzo5c8fp9EqZsfrp44-09whu-HPk9c-eNc2KXS4Mg@mail.gmail.com>
Date:   Wed, 27 May 2020 09:44:56 -0700
From:   Eric Anholt <eric@...olt.net>
To:     Maxime Ripard <maxime@...no.tech>
Cc:     Nicolas Saenz Julienne <nsaenzjulienne@...e.de>,
        DRI Development <dri-devel@...ts.freedesktop.org>,
        linux-rpi-kernel@...ts.infradead.org,
        bcm-kernel-feedback-list@...adcom.com,
        linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
        Dave Stevenson <dave.stevenson@...pberrypi.com>,
        Tim Gover <tim.gover@...pberrypi.com>,
        Phil Elwell <phil@...pberrypi.com>
Subject: Re: [PATCH v3 016/105] drm/vc4: plane: Improve LBM usage

On Wed, May 27, 2020 at 8:49 AM Maxime Ripard <maxime@...no.tech> wrote:
>
> From: Dave Stevenson <dave.stevenson@...pberrypi.com>
>
> LBM allocations were always taking the worst case sizing of
> max(src_width, dst_width) * 16. This is significantly over
> the required sizing, and stops us rendering multiple 4k images
> to the screen.
>
> Add some of the additional constraints to more accurately
> describe the LBM requirements.
>
> Signed-off-by: Dave Stevenson <dave.stevenson@...pberrypi.com>
> Signed-off-by: Maxime Ripard <maxime@...no.tech>
> ---
>  drivers/gpu/drm/vc4/vc4_plane.c | 31 ++++++++++++++++++++-----------
>  1 file changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
> index 1575c05e3106..602927745f84 100644
> --- a/drivers/gpu/drm/vc4/vc4_plane.c
> +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> @@ -142,9 +142,10 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
>         return NULL;
>  }
>
> -static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
> +static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst,
> +                                                 bool chroma_vrep)
>  {
> -       if (dst == src)
> +       if (dst == src && !chroma_vrep)
>                 return VC4_SCALING_NONE;
>         if (3 * dst >= 2 * src)
>                 return VC4_SCALING_PPF;
> @@ -369,9 +370,11 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
>                 return ret;
>
>         vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
> -                                                      vc4_state->crtc_w);
> +                                                      vc4_state->crtc_w,
> +                                                      false);
>         vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
> -                                                      vc4_state->crtc_h);
> +                                                      vc4_state->crtc_h,
> +                                                      false);
>
>         vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
>                                vc4_state->y_scaling[0] == VC4_SCALING_NONE);
> @@ -384,10 +387,12 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
>
>                 vc4_state->x_scaling[1] =
>                         vc4_get_scaling_mode(vc4_state->src_w[1],
> -                                            vc4_state->crtc_w);
> +                                            vc4_state->crtc_w,
> +                                            v_subsample == 2);
>                 vc4_state->y_scaling[1] =
>                         vc4_get_scaling_mode(vc4_state->src_h[1],
> -                                            vc4_state->crtc_h);
> +                                            vc4_state->crtc_h,
> +                                            v_subsample == 2);
>
>                 /* YUV conversion requires that horizontal scaling be enabled
>                  * on the UV plane even if vc4_get_scaling_mode() returned

The change above isn't mentioned in the commit message and I don't
understand what's going on.  It should be split out with an
explanation.

> @@ -437,10 +442,7 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
>  static u32 vc4_lbm_size(struct drm_plane_state *state)
>  {
>         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
> -       /* This is the worst case number.  One of the two sizes will
> -        * be used depending on the scaling configuration.
> -        */
> -       u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
> +       u32 pix_per_line;
>         u32 lbm;
>
>         /* LBM is not needed when there's no vertical scaling. */
> @@ -448,6 +450,11 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
>             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
>                 return 0;
>
> +       if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
> +               pix_per_line = vc4_state->crtc_w;
> +       else
> +               pix_per_line = vc4_state->src_w[0];

Looks like it's also crtc_w for RGB or 4:4:4 and HPPF in (0.5,1.0].
Maybe drop a note in here that we're not covering that case, but src_w
> crtc_w so it's safe at least.

> +
>         if (!vc4_state->is_yuv) {
>                 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
>                         lbm = pix_per_line * 8;
> @@ -583,7 +590,9 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
>                 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
>                 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
>                                                  &vc4_state->lbm,
> -                                                lbm_size, 32, 0, 0);
> +                                                lbm_size,
> +                                                vc4->hvs->hvs5 ? 64 : 32,
> +                                                0, 0);
>                 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
>
>                 if (ret)
> --
> git-series 0.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ