@@ -1736,7 +1736,7 @@ static void CalculateBytePerPixelAndBlockSizes(
#endif
} // CalculateBytePerPixelAndBlockSizes
-static dml_float_t CalculateTWait(
+static noinline_for_stack dml_float_t CalculateTWait(
dml_uint_t PrefetchMode,
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
@@ -4458,7 +4458,7 @@ static void CalculateSwathWidth(
}
} // CalculateSwathWidth
-static dml_float_t CalculateExtraLatency(
+static noinline_for_stack dml_float_t CalculateExtraLatency(
dml_uint_t RoundTripPingLatencyCycles,
dml_uint_t ReorderingBytes,
dml_float_t DCFCLK,
@@ -5915,7 +5915,7 @@ static dml_uint_t DSCDelayRequirement(
return DSCDelayRequirement_val;
}
-static dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
+static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
dml_bool_t NotUrgentLatencyHiding[],
dml_float_t ReadBandwidthLuma[],
@@ -6019,7 +6019,7 @@ static void CalculatePrefetchBandwithSupport(
#endif
}
-static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
+static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
dml_float_t ReadBandwidthLuma[],
@@ -6213,7 +6213,7 @@ static dml_uint_t CalculateMaxVStartup(
return max_vstartup_lines;
}
-static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
+static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
dml_uint_t j,
dml_uint_t k)
@@ -6265,7 +6265,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m
CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
}
-static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
+static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
{
struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
@@ -2778,7 +2778,7 @@ static double dml_get_return_bandwidth_available(
return return_bw_mbps;
}
-static void calculate_bandwidth_available(
+static noinline_for_stack void calculate_bandwidth_available(
double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
@@ -3531,7 +3531,7 @@ static void CalculateUrgentBurstFactor(
}
-static void CalculateDCFCLKDeepSleep(
+static noinline_for_stack void CalculateDCFCLKDeepSleep(
const struct dml2_display_cfg *display_cfg,
unsigned int NumberOfActiveSurfaces,
unsigned int BytePerPixelY[],
@@ -4076,7 +4076,7 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
return true;
}
-static void CalculateODMMode(
+static noinline_for_stack void CalculateODMMode(
unsigned int MaximumPixelsPerLinePerDSCUnit,
unsigned int HActive,
enum dml2_output_format_class OutFormat,
@@ -4173,7 +4173,7 @@ static void CalculateODMMode(
#endif
}
-static void CalculateOutputLink(
+static noinline_for_stack void CalculateOutputLink(
struct dml2_core_internal_scratch *s,
double PHYCLK,
double PHYCLKD18,
@@ -5928,7 +5928,7 @@ static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned in
}
// a global check against the aggregate effect of the per plane prefetch schedule
-static bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
+static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
{
struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
@@ -6941,7 +6941,7 @@ static void calculate_bytes_to_fetch_required_to_hide_latency(
}
}
-static void calculate_vactive_det_fill_latency(
+static noinline_for_stack void calculate_vactive_det_fill_latency(
const struct dml2_display_cfg *display_cfg,
unsigned int num_active_planes,
unsigned int bytes_required_l[],
When compiling allmodconfig (CONFIG_WERROR=y) with clang-19, see the following errors: .../display/dc/dml2/display_mode_core.c:6268:13: warning: stack frame size (3128) exceeds limit (3072) in 'dml_prefetch_check' [-Wframe-larger-than] .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:7236:13: warning: stack frame size (3256) exceeds limit (3072) in 'dml_core_mode_support' [-Wframe-larger-than] Mark static functions called by dml_prefetch_check() and dml_core_mode_support() noinline_for_stack to avoid them become huge functions and thus exceed the frame size limit. A way to reproduce: $ git checkout next-20250107 $ mkdir build_dir $ export PATH=/tmp/llvm-19.1.6-x86_64/bin:$PATH $ make LLVM=1 O=build_dir allmodconfig $ make LLVM=1 O=build_dir drivers/gpu/drm/ -j The way how it chose static functions to mark: [0] Unset CONFIG_WERROR in build_dir/.config. To get display_mode_core.o without errors. [1] Get a function list called by dml_prefetch_check(). $ sed -n '6268,6711p' drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c \ | sed -n -r 's/.*\W(\w+)\(.*/\1/p' | sort -u >/tmp/syms [2] Get the non-inline function list. Objdump won't show the symbols if they are inline functions. $ make LLVM=1 O=build_dir drivers/gpu/drm/ -j $ objdump -d build_dir/.../display_mode_core.o | \ ./scripts/checkstack.pl x86_64 0 | \ grep -f /tmp/syms | cut -d' ' -f2- >/tmp/orig [3] Get the full function list. Append "-fno-inline" to `CFLAGS_.../display_mode_core.o` in drivers/gpu/drm/amd/display/dc/dml2/Makefile. $ make LLVM=1 O=build_dir drivers/gpu/drm/ -j $ objdump -d build_dir/.../display_mode_core.o | \ ./scripts/checkstack.pl x86_64 0 | \ grep -f /tmp/syms | cut -d' ' -f2- >/tmp/noinline [4] Get the inline function list. If a symbol only in /tmp/noinline but not in /tmp/orig, it is a good candidate to mark noinline. $ diff /tmp/orig /tmp/noinline Chosen functions and their stack sizes: CalculateBandwidthAvailableForImmediateFlip [display_mode_core.o]:144 CalculateExtraLatency [display_mode_core.o]:176 CalculateTWait [display_mode_core.o]:64 CalculateVActiveBandwithSupport [display_mode_core.o]:112 set_calculate_prefetch_schedule_params [display_mode_core.o]:48 CheckGlobalPrefetchAdmissibility [dml2_core_dcn4_calcs.o]:544 calculate_bandwidth_available [dml2_core_dcn4_calcs.o]:320 calculate_vactive_det_fill_latency [dml2_core_dcn4_calcs.o]:272 CalculateDCFCLKDeepSleep [dml2_core_dcn4_calcs.o]:208 CalculateODMMode [dml2_core_dcn4_calcs.o]:208 CalculateOutputLink [dml2_core_dcn4_calcs.o]:176 Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org> --- .../gpu/drm/amd/display/dc/dml2/display_mode_core.c | 12 ++++++------ .../dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-)