lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cf4fb10d-f0f0-3cd2-3ccb-1f1cb0594295@amd.com>
Date:   Thu, 21 Jul 2022 14:58:41 -0400
From:   Rodrigo Siqueira Jordao <Rodrigo.Siqueira@....com>
To:     Melissa Wen <mwen@...lia.com>, harry.wentland@....com,
        sunpeng.li@....com, alexander.deucher@....com,
        christian.koenig@....com, Xinhui.Pan@....com, airlied@...ux.ie,
        daniel@...ll.ch
Cc:     Guenter Roeck <linux@...ck-us.net>,
        MaĆ­ra Canal <mairacanal@...eup.net>,
        kernel-dev@...lia.com, amd-gfx@...ts.freedesktop.org,
        dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 4/5] drm/amd/display: move FPU code from dcn30 clk mgr to
 DML folder



On 2022-07-20 15:32, Melissa Wen wrote:
> The -mno-gnu-attribute option in clk mgr makefile for dcn30 hides a soft
> vs hard fp error for powerpc. After removing this flag, we can see some
> FPU code remains there:
> 
> gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld:
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses
> hard float,
> drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.o
> uses soft float
> 
> Therefore, remove the -mno-gnu-attribute flag for dcn30/powerpc and move
> FPU-associated code to DML folder.
> 
> Signed-off-by: Melissa Wen <mwen@...lia.com>
> ---
>   .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  6 --
>   .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 63 ++-----------------
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 63 ++++++++++++++++++-
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h  |  1 +
>   4 files changed, 68 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> index 66dc02c426e9..15b660a951a5 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> @@ -115,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
>   ###############################################################################
>   CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
>   
> -# prevent build errors regarding soft-float vs hard-float FP ABI tags
> -# this code is currently unused on ppc64, as it applies to VanGogh APUs only
> -ifdef CONFIG_PPC64
> -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
> -endif
> -
>   AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
>   
>   AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> index 914708cefc79..3ce0ee0d012f 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> @@ -29,6 +29,7 @@
>   #include "dcn20/dcn20_clk_mgr.h"
>   #include "dce100/dce_clk_mgr.h"
>   #include "dcn30/dcn30_clk_mgr.h"
> +#include "dml/dcn30/dcn30_fpu.h"
>   #include "reg_helper.h"
>   #include "core_types.h"
>   #include "dm_helpers.h"
> @@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl
>   	}
>   }
>   
> -static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
> +static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
>   {
> -	/* defaults */
> -	double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
> -	double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
> -	double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
> -	uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
> -
> -	/* Set A - Normal - default values*/
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
> -
> -	/* Set B - Performance - higher minimum clocks */
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
> -
> -	/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
> -	clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
> -	clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
> -	clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
> -	clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
> -	clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
> -
> -	/* Set D - MALL - SR enter and exit times adjusted for MALL */
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
> +	DC_FP_START();
> +	dcn3_fpu_build_wm_range_table(&clk_mgr->base);
> +	DC_FP_END();
>   }
>   
>   void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> index a8db1306750e..c00f759fdded 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> @@ -29,7 +29,7 @@
>   #include "dcn20/dcn20_resource.h"
>   #include "dcn30/dcn30_resource.h"
>   
> -
> +#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h"
>   #include "display_mode_vba_30.h"
>   #include "dcn30_fpu.h"
>   
> @@ -616,4 +616,65 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
>   
>   }
>   
> +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base)
> +{
> +	/* defaults */
> +	double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us;
> +	double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us;
> +	double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
> +	uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz;
>   
> +	dc_assert_fp_enabled();
> +
> +	/* Set A - Normal - default values*/
> +	base->bw_params->wm_table.nv_entries[WM_A].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
> +
> +	/* Set B - Performance - higher minimum clocks */
> +//	base->bw_params->wm_table.nv_entries[WM_B].valid = true;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
> +
> +	/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
> +	base->bw_params->wm_table.nv_entries[WM_C].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
> +	base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
> +	base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
> +	base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
> +	base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
> +	base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
> +	base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
> +	base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
> +	base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
> +
> +	/* Set D - MALL - SR enter and exit times adjusted for MALL */
> +	base->bw_params->wm_table.nv_entries[WM_D].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> index dedfe7b5f173..c2024052a497 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> @@ -63,5 +63,6 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
>   	unsigned int *dcfclk_mhz,
>   	unsigned int *dram_speed_mts);
>   
> +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base);
>   
>   #endif /* __DCN30_FPU_H__*/

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@....com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ