[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210318093142.GB18038@qmqm.qmqm.pl>
Date: Thu, 18 Mar 2021 10:31:42 +0100
From: Michał Mirosław <mirq-linux@...e.qmqm.pl>
To: Dmitry Osipenko <digetx@...il.com>
Cc: Thierry Reding <thierry.reding@...il.com>,
Jonathan Hunter <jonathanh@...dia.com>,
Matt Merhar <mattmerhar@...tonmail.com>,
Peter Geis <pgwipeout@...il.com>,
Nicolas Chauvet <kwizart@...il.com>,
linux-tegra@...r.kernel.org, linux-pm@...r.kernel.org,
linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org
Subject: Re: [PATCH v16 1/2] drm/tegra: dc: Support memory bandwidth
management
On Wed, Mar 17, 2021 at 09:57:33PM +0300, Dmitry Osipenko wrote:
[...]
> --- a/drivers/gpu/drm/tegra/dc.c
> +++ b/drivers/gpu/drm/tegra/dc.c
> @@ -8,6 +8,7 @@
> #include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/iommu.h>
> +#include <linux/interconnect.h>
> #include <linux/module.h>
> #include <linux/of_device.h>
> #include <linux/pm_runtime.h>
> @@ -618,6 +619,9 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
> struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
> int err;
>
> + plane_state->peak_memory_bandwidth = 0;
> + plane_state->avg_memory_bandwidth = 0;
> +
> /* no need for further checks if the plane is being disabled */
> if (!new_plane_state->crtc)
> return 0;
> @@ -808,6 +812,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
> formats = dc->soc->primary_formats;
> modifiers = dc->soc->modifiers;
>
> + err = tegra_plane_interconnect_init(plane);
> + if (err) {
> + kfree(plane);
> + return ERR_PTR(err);
> + }
> +
> err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
> &tegra_plane_funcs, formats,
> num_formats, modifiers, type, NULL);
> @@ -841,9 +851,13 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane,
> {
> struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
> plane);
> + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
> struct tegra_plane *tegra = to_tegra_plane(plane);
> int err;
>
> + plane_state->peak_memory_bandwidth = 0;
> + plane_state->avg_memory_bandwidth = 0;
> +
> /* no need for further checks if the plane is being disabled */
> if (!new_plane_state->crtc)
> return 0;
> @@ -985,6 +999,12 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
> num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
> formats = tegra_cursor_plane_formats;
>
> + err = tegra_plane_interconnect_init(plane);
> + if (err) {
> + kfree(plane);
> + return ERR_PTR(err);
> + }
> +
> err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
> &tegra_plane_funcs, formats,
> num_formats, NULL,
> @@ -1099,6 +1119,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
> num_formats = dc->soc->num_overlay_formats;
> formats = dc->soc->overlay_formats;
>
> + err = tegra_plane_interconnect_init(plane);
> + if (err) {
> + kfree(plane);
> + return ERR_PTR(err);
> + }
> +
> if (!cursor)
> type = DRM_PLANE_TYPE_OVERLAY;
> else
> @@ -1216,6 +1242,7 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
> {
> struct tegra_dc_state *state = to_dc_state(crtc->state);
> struct tegra_dc_state *copy;
> + unsigned int i;
>
> copy = kmalloc(sizeof(*copy), GFP_KERNEL);
> if (!copy)
> @@ -1227,6 +1254,9 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
> copy->div = state->div;
> copy->planes = state->planes;
>
> + for (i = 0; i < ARRAY_SIZE(state->plane_peak_bw); i++)
> + copy->plane_peak_bw[i] = state->plane_peak_bw[i];
> +
> return ©->base;
> }
>
> @@ -1753,6 +1783,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout)
> return -ETIMEDOUT;
> }
>
> +static void
> +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc,
> + struct drm_atomic_state *state,
> + bool prepare_bandwidth_transition)
> +{
> + const struct tegra_plane_state *old_tegra_state, *new_tegra_state;
> + const struct tegra_dc_state *old_dc_state, *new_dc_state;
> + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw;
> + const struct drm_plane_state *old_plane_state;
> + const struct drm_crtc_state *old_crtc_state;
> + struct tegra_dc_window window, old_window;
> + struct tegra_dc *dc = to_tegra_dc(crtc);
> + struct tegra_plane *tegra;
> + struct drm_plane *plane;
> +
> + if (dc->soc->has_nvdisplay)
> + return;
> +
> + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
> + old_dc_state = to_const_dc_state(old_crtc_state);
> + new_dc_state = to_const_dc_state(crtc->state);
> +
> + if (!crtc->state->active) {
> + if (!old_crtc_state->active)
> + return;
> +
> + /*
> + * When CRTC is disabled on DPMS, the state of attached planes
> + * is kept unchanged. Hence we need to enforce removal of the
> + * bandwidths from the ICC paths.
> + */
> + drm_atomic_crtc_for_each_plane(plane, crtc) {
> + tegra = to_tegra_plane(plane);
> +
> + icc_set_bw(tegra->icc_mem, 0, 0);
> + icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
> + }
> +
> + return;
> + }
> +
> + for_each_old_plane_in_state(old_crtc_state->state, plane,
> + old_plane_state, i) {
> + old_tegra_state = to_const_tegra_plane_state(old_plane_state);
> + new_tegra_state = to_const_tegra_plane_state(plane->state);
> + tegra = to_tegra_plane(plane);
> +
> + /*
> + * We're iterating over the global atomic state and it contains
> + * planes from another CRTC, hence we need to filter out the
> + * planes unrelated to this CRTC.
> + */
> + if (tegra->dc != dc)
> + continue;
> +
> + new_avg_bw = new_tegra_state->avg_memory_bandwidth;
> + old_avg_bw = old_tegra_state->avg_memory_bandwidth;
> +
> + new_peak_bw = new_dc_state->plane_peak_bw[tegra->index];
> + old_peak_bw = old_dc_state->plane_peak_bw[tegra->index];
> +
> + /*
> + * See the comment related to !crtc->state->active above,
> + * which explains why bandwidths need to be updated when
> + * CRTC is turning ON.
> + */
> + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw &&
> + old_crtc_state->active)
> + continue;
> +
> + window.src.h = drm_rect_height(&plane->state->src) >> 16;
> + window.dst.h = drm_rect_height(&plane->state->dst);
> +
> + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16;
> + old_window.dst.h = drm_rect_height(&old_plane_state->dst);
> +
> + /*
> + * During the preparation phase (atomic_begin), the memory
> + * freq should go high before the DC changes are committed
> + * if bandwidth requirement goes up, otherwise memory freq
> + * should to stay high if BW requirement goes down. The
> + * opposite applies to the completion phase (post_commit).
> + */
> + if (prepare_bandwidth_transition) {
> + new_avg_bw = max(old_avg_bw, new_avg_bw);
> + new_peak_bw = max(old_peak_bw, new_peak_bw);
> +
> + if (tegra_plane_use_vertical_filtering(tegra, &old_window))
> + window = old_window;
> + }
> +
> + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw);
> +
> + if (tegra_plane_use_vertical_filtering(tegra, &window))
> + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw);
> + else
> + icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
> + }
> +}
> +
> static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
> struct drm_atomic_state *state)
> {
> @@ -1934,6 +2064,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
> {
> unsigned long flags;
>
> + tegra_crtc_update_memory_bandwidth(crtc, state, true);
> +
> if (crtc->state->event) {
> spin_lock_irqsave(&crtc->dev->event_lock, flags);
>
> @@ -1966,7 +2098,215 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
> value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
> }
>
> +static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
> +{
> + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
> + const struct drm_format_info *fmt = state->fb->format;
> + unsigned int src_w = drm_rect_width(&state->src) >> 16;
> + unsigned int dst_w = drm_rect_width(&state->dst);
> +
> + if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
> + return false;
> +
> + if (soc->supports_cursor)
> + return true;
> +
> + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
> + return false;
> +
> + return true;
> +}
> +
> +static unsigned long
> +tegra_plane_overlap_mask(struct drm_crtc_state *state,
> + const struct drm_plane_state *plane_state)
> +{
> + const struct drm_plane_state *other_state;
> + const struct tegra_plane *tegra;
> + unsigned long overlap_mask = 0;
> + struct drm_plane *plane;
> + struct drm_rect rect;
> +
> + if (!plane_state->visible || !plane_state->fb)
> + return 0;
> +
> + /*
> + * Data-prefetch FIFO will easily help to overcome temporal memory
> + * pressure if other plane overlaps with the cursor plane.
> + */
> + if (tegra_plane_is_cursor(plane_state))
> + return 0;
> +
> + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
> + rect = plane_state->dst;
> +
> + tegra = to_tegra_plane(other_state->plane);
> +
> + if (!other_state->visible || !other_state->fb)
> + continue;
> +
> + /*
> + * Ignore cursor plane overlaps because it's not practical to
> + * assume that it contributes to the bandwidth in overlapping
> + * area if window width is small.
> + */
> + if (tegra_plane_is_cursor(other_state))
> + continue;
> +
> + if (drm_rect_intersect(&rect, &other_state->dst))
> + overlap_mask |= BIT(tegra->index);
> + }
> +
> + return overlap_mask;
> +}
> +
> +static struct drm_plane *
> +tegra_crtc_get_plane_by_index(struct drm_crtc *crtc, unsigned int index)
> +{
> + struct drm_plane *plane;
> +
> + drm_atomic_crtc_for_each_plane(plane, crtc) {
> + if (to_tegra_plane(plane)->index == index)
> + return plane;
> + }
> +
> + return NULL;
> +}
> +
> +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc,
> + struct drm_atomic_state *state)
> +{
> + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask;
> + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {};
> + bool all_planes_overlap_simultaneously = true;
> + const struct tegra_plane_state *tegra_state;
> + const struct drm_plane_state *plane_state;
> + const struct tegra_dc_state *old_dc_state;
> + struct tegra_dc *dc = to_tegra_dc(crtc);
> + const struct drm_crtc_state *old_state;
> + struct tegra_dc_state *new_dc_state;
> + struct drm_crtc_state *new_state;
> + struct tegra_plane *tegra;
> + struct drm_plane *plane;
> + u32 i, k, overlap_bw;
> +
> + /*
> + * The nv-display uses shared planes. The algorithm below assumes
> + * maximum 3 planes per-CRTC, this assumption isn't applicable to
> + * the nv-display. Note that T124 support has additional windows,
> + * but currently they aren't supported by the driver.
> + */
> + if (dc->soc->has_nvdisplay)
> + return 0;
> +
> + new_state = drm_atomic_get_new_crtc_state(state, crtc);
> + new_dc_state = to_dc_state(new_state);
> +
> + /*
> + * For overlapping planes pixel's data is fetched for each plane at
> + * the same time, hence bandwidths are accumulated in this case.
> + * This needs to be taken into account for calculating total bandwidth
> + * consumed by all planes.
> + *
> + * Here we get the overlapping state of each plane, which is a
> + * bitmask of plane indices telling with what planes there is an
> + * overlap. Note that bitmask[plane] includes BIT(plane) in order
> + * to make further code nicer and simpler.
> + */
> + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
> + tegra_state = to_const_tegra_plane_state(plane_state);
> + tegra = to_tegra_plane(plane);
> +
> + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM))
> + return -EINVAL;
> +
> + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth;
> + mask = tegra_plane_overlap_mask(new_state, plane_state);
> + overlap_mask[tegra->index] = mask;
> +
> + if (hweight_long(mask) != 3)
> + all_planes_overlap_simultaneously = false;
> + }
> +
> + old_state = drm_atomic_get_old_crtc_state(state, crtc);
> + old_dc_state = to_const_dc_state(old_state);
> +
> + /*
> + * Then we calculate maximum bandwidth of each plane state.
> + * The bandwidth includes the plane BW + BW of the "simultaneously"
> + * overlapping planes, where "simultaneously" means areas where DC
> + * fetches from the planes simultaneously during of scan-out process.
> + *
> + * For example, if plane A overlaps with planes B and C, but B and C
> + * don't overlap, then the peak bandwidth will be either in area where
> + * A-and-B or A-and-C planes overlap.
> + *
> + * The plane_peak_bw[] contains peak memory bandwidth values of
> + * each plane, this information is needed by interconnect provider
> + * in order to set up latency allowness based on the peak BW, see
> + * tegra_crtc_update_memory_bandwidth().
> + */
> + for (i = 0; i < ARRAY_SIZE(plane_peak_bw); i++) {
> + overlap_bw = 0;
> +
> + for_each_set_bit(k, &overlap_mask[i], 3) {
> + if (k == i)
> + continue;
> +
> + if (all_planes_overlap_simultaneously)
> + overlap_bw += plane_peak_bw[k];
> + else
> + overlap_bw = max(overlap_bw, plane_peak_bw[k]);
> + }
> +
> + new_dc_state->plane_peak_bw[i] = plane_peak_bw[i] + overlap_bw;
> +
> + /*
> + * If plane's peak bandwidth changed (for example plane isn't
> + * overlapped anymore) and plane isn't in the atomic state,
> + * then add plane to the state in order to have the bandwidth
> + * updated.
> + */
> + if (old_dc_state->plane_peak_bw[i] !=
> + new_dc_state->plane_peak_bw[i]) {
> + plane = tegra_crtc_get_plane_by_index(crtc, i);
> + if (!plane)
> + continue;
> +
> + plane_state = drm_atomic_get_plane_state(state, plane);
> + if (IS_ERR(plane_state))
> + return PTR_ERR(plane_state);
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
> + struct drm_atomic_state *state)
> +{
> + int err;
> +
> + err = tegra_crtc_calculate_memory_bandwidth(crtc, state);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
> + struct drm_atomic_state *state)
> +{
> + /*
> + * Display bandwidth is allowed to go down only once hardware state
> + * is known to be armed, i.e. state was committed and VBLANK event
> + * received.
> + */
> + tegra_crtc_update_memory_bandwidth(crtc, state, false);
> +}
> +
> static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
> + .atomic_check = tegra_crtc_atomic_check,
> .atomic_begin = tegra_crtc_atomic_begin,
> .atomic_flush = tegra_crtc_atomic_flush,
> .atomic_enable = tegra_crtc_atomic_enable,
> @@ -2257,7 +2597,9 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
> .overlay_formats = tegra20_overlay_formats,
> .modifiers = tegra20_modifiers,
> .has_win_a_without_filters = true,
> + .has_win_b_vfilter_mem_client = true,
> .has_win_c_without_vert_filter = true,
> + .plane_tiled_memory_bandwidth_x2 = false,
> };
>
> static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
> @@ -2276,7 +2618,9 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
> .overlay_formats = tegra20_overlay_formats,
> .modifiers = tegra20_modifiers,
> .has_win_a_without_filters = false,
> + .has_win_b_vfilter_mem_client = true,
> .has_win_c_without_vert_filter = false,
> + .plane_tiled_memory_bandwidth_x2 = true,
> };
>
> static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
> @@ -2295,7 +2639,9 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
> .overlay_formats = tegra114_overlay_formats,
> .modifiers = tegra20_modifiers,
> .has_win_a_without_filters = false,
> + .has_win_b_vfilter_mem_client = false,
> .has_win_c_without_vert_filter = false,
> + .plane_tiled_memory_bandwidth_x2 = true,
> };
>
> static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
> @@ -2314,7 +2660,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
> .overlay_formats = tegra124_overlay_formats,
> .modifiers = tegra124_modifiers,
> .has_win_a_without_filters = false,
> + .has_win_b_vfilter_mem_client = false,
> .has_win_c_without_vert_filter = false,
> + .plane_tiled_memory_bandwidth_x2 = false,
> };
>
> static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
> @@ -2333,7 +2681,9 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
> .overlay_formats = tegra114_overlay_formats,
> .modifiers = tegra124_modifiers,
> .has_win_a_without_filters = false,
> + .has_win_b_vfilter_mem_client = false,
> .has_win_c_without_vert_filter = false,
> + .plane_tiled_memory_bandwidth_x2 = false,
> };
>
> static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
> @@ -2382,6 +2732,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
> .has_nvdisplay = true,
> .wgrps = tegra186_dc_wgrps,
> .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
> + .plane_tiled_memory_bandwidth_x2 = false,
> };
>
> static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
> @@ -2430,6 +2781,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
> .has_nvdisplay = true,
> .wgrps = tegra194_dc_wgrps,
> .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
> + .plane_tiled_memory_bandwidth_x2 = false,
> };
For globals you will have .x = false by default; I'm not sure those entries
add much value.
Reviewed-by: Michał Mirosław <mirq-linux@...e.qmqm.pl>
Powered by blists - more mailing lists