lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 14 Sep 2021 17:05:08 -0400
From:   Harry Wentland <harry.wentland@....com>
To:     <amd-gfx@...ts.freedesktop.org>
CC:     <ndesaulniers@...gle.com>, <torvalds@...ux-foundation.org>,
        <linux-kernel@...r.kernel.org>, <arnd@...nel.org>,
        <sunpeng.li@....com>, <alexander.deucher@....com>,
        <christian.koenig@....com>, <Xinhui.Pan@....com>,
        <nathan@...nel.org>, <linux@...ck-us.net>, <llvm@...ts.linux.dev>,
        Harry Wentland <harry.wentland@....com>
Subject: [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull

[Why & How]
With Werror enabled in the kernel we were failing the clang build since
dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
building with clang, and exceeding the default 1024 stack frame limit.

The culprit seems to be the Pipe struct, so pull the relevant block
out into its own sub-function.

Signed-off-by: Harry Wentland <harry.wentland@....com>
Fixes: 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds")
Cc: Nick Desaulniers <ndesaulniers@...gle.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: amd-gfx@...ts.freedesktop.org
Cc: Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Cc: Arnd Bergmann <arnd@...nel.org>
Cc: Leo Li <sunpeng.li@....com>
Cc: Alex Deucher <alexander.deucher@....com>
Cc: Christian König <christian.koenig@....com>
Cc: Xinhui Pan <Xinhui.Pan@....com>
Cc: Nathan Chancellor <nathan@...nel.org>
Cc: Guenter Roeck <linux@...ck-us.net>
Cc: llvm@...ts.linux.dev
---
 .../dc/dml/dcn21/display_mode_vba_21.c        | 236 +++++++++---------
 1 file changed, 123 insertions(+), 113 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 4136eb8256cb..8a7485e21d53 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -3394,6 +3394,127 @@ static unsigned int TruncToValidBPP(
 	}
 }
 
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+		struct display_mode_lib *mode_lib,
+		int i,
+		unsigned j,
+		unsigned k)
+{
+	struct vba_vars_st *locals = &mode_lib->vba;
+	Pipe myPipe;
+	HostVM myHostVM;
+
+	if (mode_lib->vba.XFCEnabled[k] == true) {
+		mode_lib->vba.XFCRemoteSurfaceFlipDelay =
+				CalculateRemoteSurfaceFlipDelay(
+						mode_lib,
+						mode_lib->vba.VRatio[k],
+						locals->SwathWidthYThisState[k],
+						dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
+						mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+						mode_lib->vba.XFCTSlvVupdateOffset,
+						mode_lib->vba.XFCTSlvVupdateWidth,
+						mode_lib->vba.XFCTSlvVreadyOffset,
+						mode_lib->vba.XFCXBUFLatencyTolerance,
+						mode_lib->vba.XFCFillBWOverhead,
+						mode_lib->vba.XFCSlvChunkSize,
+						mode_lib->vba.XFCBusTransportTime,
+						mode_lib->vba.TimeCalc,
+						mode_lib->vba.TWait,
+						&mode_lib->vba.SrcActiveDrainRate,
+						&mode_lib->vba.TInitXFill,
+						&mode_lib->vba.TslvChk);
+	} else {
+		mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
+	}
+
+	myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
+	myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
+	myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+	myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
+	myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
+	myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+	myPipe.SourceScan = mode_lib->vba.SourceScan[k];
+	myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
+	myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
+	myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
+	myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
+	myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+	myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+	myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+	myPipe.HTotal = mode_lib->vba.HTotal[k];
+
+
+	myHostVM.Enable = mode_lib->vba.HostVMEnable;
+	myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
+	myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
+
+
+	mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
+			mode_lib,
+			mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
+			mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
+			&myPipe,
+			locals->DSCDelayPerState[i][k],
+			mode_lib->vba.DPPCLKDelaySubtotal,
+			mode_lib->vba.DPPCLKDelaySCL,
+			mode_lib->vba.DPPCLKDelaySCLLBOnly,
+			mode_lib->vba.DPPCLKDelayCNVCFormater,
+			mode_lib->vba.DPPCLKDelayCNVCCursor,
+			mode_lib->vba.DISPCLKDelaySubtotal,
+			locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
+			mode_lib->vba.OutputFormat[k],
+			mode_lib->vba.MaxInterDCNTileRepeaters,
+			dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
+			locals->MaximumVStartup[0][0][k],
+			mode_lib->vba.GPUVMMaxPageTableLevels,
+			mode_lib->vba.GPUVMEnable,
+			&myHostVM,
+			mode_lib->vba.DynamicMetadataEnable[k],
+			mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
+			mode_lib->vba.DynamicMetadataTransmittedBytes[k],
+			mode_lib->vba.DCCEnable[k],
+			mode_lib->vba.UrgentLatency,
+			mode_lib->vba.ExtraLatency,
+			mode_lib->vba.TimeCalc,
+			locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+			locals->MetaRowBytes[0][0][k],
+			locals->DPTEBytesPerRow[0][0][k],
+			locals->PrefetchLinesY[0][0][k],
+			locals->SwathWidthYThisState[k],
+			locals->BytePerPixelInDETY[k],
+			locals->PrefillY[k],
+			locals->MaxNumSwY[k],
+			locals->PrefetchLinesC[0][0][k],
+			locals->BytePerPixelInDETC[k],
+			locals->PrefillC[k],
+			locals->MaxNumSwC[k],
+			locals->SwathHeightYThisState[k],
+			locals->SwathHeightCThisState[k],
+			mode_lib->vba.TWait,
+			mode_lib->vba.XFCEnabled[k],
+			mode_lib->vba.XFCRemoteSurfaceFlipDelay,
+			mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+			&locals->dst_x_after_scaler,
+			&locals->dst_y_after_scaler,
+			&locals->LineTimesForPrefetch[k],
+			&locals->PrefetchBW[k],
+			&locals->LinesForMetaPTE[k],
+			&locals->LinesForMetaAndDPTERow[k],
+			&locals->VRatioPreY[i][j][k],
+			&locals->VRatioPreC[i][j][k],
+			&locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
+			&locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
+			&locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
+			&locals->Tno_bw[k],
+			&locals->prefetch_vmrow_bw[k],
+			locals->swath_width_luma_ub,
+			locals->swath_width_chroma_ub,
+			&mode_lib->vba.VUpdateOffsetPix[k],
+			&mode_lib->vba.VUpdateWidthPix[k],
+			&mode_lib->vba.VReadyOffsetPix[k]);
+}
 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
 {
 	struct vba_vars_st *locals = &mode_lib->vba;
@@ -4676,120 +4797,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DRAMClockChangeLatency,
 						mode_lib->vba.UrgentLatency,
 						mode_lib->vba.SREnterPlusExitTime);
-				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					Pipe myPipe;
-					HostVM myHostVM;
-
-					if (mode_lib->vba.XFCEnabled[k] == true) {
-						mode_lib->vba.XFCRemoteSurfaceFlipDelay =
-								CalculateRemoteSurfaceFlipDelay(
-										mode_lib,
-										mode_lib->vba.VRatio[k],
-										locals->SwathWidthYThisState[k],
-										dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
-										mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
-										mode_lib->vba.XFCTSlvVupdateOffset,
-										mode_lib->vba.XFCTSlvVupdateWidth,
-										mode_lib->vba.XFCTSlvVreadyOffset,
-										mode_lib->vba.XFCXBUFLatencyTolerance,
-										mode_lib->vba.XFCFillBWOverhead,
-										mode_lib->vba.XFCSlvChunkSize,
-										mode_lib->vba.XFCBusTransportTime,
-										mode_lib->vba.TimeCalc,
-										mode_lib->vba.TWait,
-										&mode_lib->vba.SrcActiveDrainRate,
-										&mode_lib->vba.TInitXFill,
-										&mode_lib->vba.TslvChk);
-					} else {
-						mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
-					}
+				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
+					CalculatePrefetchSchedulePerPlane(mode_lib, i, j, k);
 
-					myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
-					myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
-					myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
-					myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
-					myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
-					myPipe.SourceScan = mode_lib->vba.SourceScan[k];
-					myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
-					myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
-					myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
-					myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
-					myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
-					myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
-					myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
-					myPipe.HTotal = mode_lib->vba.HTotal[k];
-
-
-					myHostVM.Enable = mode_lib->vba.HostVMEnable;
-					myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
-					myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
-
-
-					mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
-							mode_lib,
-							mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
-							mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
-							&myPipe,
-							locals->DSCDelayPerState[i][k],
-							mode_lib->vba.DPPCLKDelaySubtotal,
-							mode_lib->vba.DPPCLKDelaySCL,
-							mode_lib->vba.DPPCLKDelaySCLLBOnly,
-							mode_lib->vba.DPPCLKDelayCNVCFormater,
-							mode_lib->vba.DPPCLKDelayCNVCCursor,
-							mode_lib->vba.DISPCLKDelaySubtotal,
-							locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
-							mode_lib->vba.OutputFormat[k],
-							mode_lib->vba.MaxInterDCNTileRepeaters,
-							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
-							locals->MaximumVStartup[0][0][k],
-							mode_lib->vba.GPUVMMaxPageTableLevels,
-							mode_lib->vba.GPUVMEnable,
-							&myHostVM,
-							mode_lib->vba.DynamicMetadataEnable[k],
-							mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
-							mode_lib->vba.DynamicMetadataTransmittedBytes[k],
-							mode_lib->vba.DCCEnable[k],
-							mode_lib->vba.UrgentLatency,
-							mode_lib->vba.ExtraLatency,
-							mode_lib->vba.TimeCalc,
-							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
-							locals->MetaRowBytes[0][0][k],
-							locals->DPTEBytesPerRow[0][0][k],
-							locals->PrefetchLinesY[0][0][k],
-							locals->SwathWidthYThisState[k],
-							locals->BytePerPixelInDETY[k],
-							locals->PrefillY[k],
-							locals->MaxNumSwY[k],
-							locals->PrefetchLinesC[0][0][k],
-							locals->BytePerPixelInDETC[k],
-							locals->PrefillC[k],
-							locals->MaxNumSwC[k],
-							locals->SwathHeightYThisState[k],
-							locals->SwathHeightCThisState[k],
-							mode_lib->vba.TWait,
-							mode_lib->vba.XFCEnabled[k],
-							mode_lib->vba.XFCRemoteSurfaceFlipDelay,
-							mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
-							&locals->dst_x_after_scaler,
-							&locals->dst_y_after_scaler,
-							&locals->LineTimesForPrefetch[k],
-							&locals->PrefetchBW[k],
-							&locals->LinesForMetaPTE[k],
-							&locals->LinesForMetaAndDPTERow[k],
-							&locals->VRatioPreY[i][j][k],
-							&locals->VRatioPreC[i][j][k],
-							&locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
-							&locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
-							&locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
-							&locals->Tno_bw[k],
-							&locals->prefetch_vmrow_bw[k],
-							locals->swath_width_luma_ub,
-							locals->swath_width_chroma_ub,
-							&mode_lib->vba.VUpdateOffsetPix[k],
-							&mode_lib->vba.VUpdateWidthPix[k],
-							&mode_lib->vba.VReadyOffsetPix[k]);
-				}
 				mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
 				mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-- 
2.33.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ