From 642d3a2bf234578da83ed1fb6ef87dba36d31dea Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 9 Apr 2021 14:55:18 -0400 Subject: [PATCH] drm/amd/display: take max dsc stream bandwidth overhead into account [why] As hardware team suggested that we need to add a max dsc bw overhead into existing stream bandwidth when DSC is used. The formula as below: max_dsc_bw_overhead = v_addressable * slice_count * 256 bit * pixel clock / v_total / h_total effective stream bandwidth = pixel clock * bpp stream bandwidth = effective stream bandwidth + dsc stream overhead Signed-off-by: Wenjing Liu Reviewed-by: Eric Bernstein Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 +- drivers/gpu/drm/amd/display/dc/dc_dsc.h | 5 +- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 113 ++++++++++++++---- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c | 43 ------- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h | 2 - 5 files changed, 94 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 931fbb4d6169..8bbed9c90c5d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3486,9 +3486,10 @@ uint32_t dc_bandwidth_in_kbps_from_timing( uint32_t kbps; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (timing->flags.DSC) { - return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel); - } + if (timing->flags.DSC) + return dc_dsc_stream_bandwidth_in_kbps(timing, + timing->dsc_cfg.bits_per_pixel, + timing->dsc_cfg.num_slices_h); #endif switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index c51d2d961b7a..afddb8b7d3e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -78,7 +78,8 @@ bool dc_dsc_compute_config( const struct dc_crtc_timing *timing, struct dc_dsc_config *dsc_cfg); -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16); +uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, + uint32_t bpp_x16, uint32_t num_slices_h); void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, @@ -88,6 +89,4 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable); -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16); - #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index be57088d185d..bfe3ad58070a 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -258,12 +258,58 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } +static struct fixed31_32 compute_dsc_max_bandwidth_overhead( + const struct dc_crtc_timing *timing, + const int num_slices_h) +{ + struct fixed31_32 max_dsc_overhead; + struct fixed31_32 refresh_rate; + + /* use target bpp that can take entire target bandwidth */ + refresh_rate = dc_fixpt_from_int(timing->pix_clk_100hz); + refresh_rate = dc_fixpt_div_int(refresh_rate, timing->h_total); + refresh_rate = dc_fixpt_div_int(refresh_rate, timing->v_total); + refresh_rate = dc_fixpt_mul_int(refresh_rate, 100); + + max_dsc_overhead = dc_fixpt_from_int(num_slices_h); + max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, timing->v_addressable); + max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, 256); + max_dsc_overhead = dc_fixpt_div_int(max_dsc_overhead, 1000); + max_dsc_overhead = dc_fixpt_mul(max_dsc_overhead, refresh_rate); + + return max_dsc_overhead; +} + +static uint32_t compute_bpp_x16_from_target_bandwidth( + const uint32_t bandwidth_in_kbps, + const struct dc_crtc_timing *timing, + const uint32_t num_slices_h, + const uint32_t bpp_increment_div) +{ + struct fixed31_32 overhead_in_kbps; + struct fixed31_32 effective_bandwidth_in_kbps; + struct fixed31_32 bpp_x16; + + overhead_in_kbps = compute_dsc_max_bandwidth_overhead( + timing, num_slices_h); + effective_bandwidth_in_kbps = dc_fixpt_from_int(bandwidth_in_kbps); + effective_bandwidth_in_kbps = dc_fixpt_sub(effective_bandwidth_in_kbps, + overhead_in_kbps); + bpp_x16 = dc_fixpt_mul_int(effective_bandwidth_in_kbps, 10); + bpp_x16 = dc_fixpt_div_int(bpp_x16, timing->pix_clk_100hz); + bpp_x16 = dc_fixpt_from_int(dc_fixpt_floor(dc_fixpt_mul_int(bpp_x16, bpp_increment_div))); + bpp_x16 = dc_fixpt_div_int(bpp_x16, bpp_increment_div); + bpp_x16 = dc_fixpt_mul_int(bpp_x16, 16); + return dc_fixpt_floor(bpp_x16); +} + /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock * and uncompressed bandwidth. */ static void get_dsc_bandwidth_range( const uint32_t min_bpp_x16, const uint32_t max_bpp_x16, + const uint32_t num_slices_h, const struct dsc_enc_caps *dsc_caps, const struct dc_crtc_timing *timing, struct dc_dsc_bw_range *range) @@ -272,16 +318,20 @@ static void get_dsc_bandwidth_range( range->stream_kbps = dc_bandwidth_in_kbps_from_timing(timing); /* max dsc target bpp */ - range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, max_bpp_x16); + range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing, + max_bpp_x16, num_slices_h); range->max_target_bpp_x16 = max_bpp_x16; if (range->max_kbps > range->stream_kbps) { /* max dsc target bpp is capped to native bandwidth */ range->max_kbps = range->stream_kbps; - range->max_target_bpp_x16 = calc_dsc_bpp_x16(range->stream_kbps, timing->pix_clk_100hz, dsc_caps->bpp_increment_div); + range->max_target_bpp_x16 = compute_bpp_x16_from_target_bandwidth( + range->max_kbps, timing, num_slices_h, + dsc_caps->bpp_increment_div); } /* min dsc target bpp */ - range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, min_bpp_x16); + range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing, + min_bpp_x16, num_slices_h); range->min_target_bpp_x16 = min_bpp_x16; if (range->min_kbps > range->max_kbps) { /* min dsc target bpp is capped to max dsc bandwidth*/ @@ -290,7 +340,6 @@ static void get_dsc_bandwidth_range( } } - /* Decides if DSC should be used and calculates target bpp if it should, applying DSC policy. * * Returns: @@ -303,6 +352,7 @@ static bool decide_dsc_target_bpp_x16( const struct dsc_enc_caps *dsc_common_caps, const int target_bandwidth_kbps, const struct dc_crtc_timing *timing, + const int num_slices_h, int *target_bpp_x16) { bool should_use_dsc = false; @@ -311,7 +361,7 @@ static bool decide_dsc_target_bpp_x16( memset(&range, 0, sizeof(range)); get_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, - dsc_common_caps, timing, &range); + num_slices_h, dsc_common_caps, timing, &range); if (!policy->enable_dsc_when_not_needed && target_bandwidth_kbps >= range.stream_kbps) { /* enough bandwidth without dsc */ *target_bpp_x16 = 0; @@ -327,7 +377,11 @@ static bool decide_dsc_target_bpp_x16( should_use_dsc = true; } else if (target_bandwidth_kbps >= range.min_kbps) { /* use target bpp that can take entire target bandwidth */ - *target_bpp_x16 = calc_dsc_bpp_x16(target_bandwidth_kbps, timing->pix_clk_100hz, dsc_common_caps->bpp_increment_div); + *target_bpp_x16 = compute_bpp_x16_from_target_bandwidth( + range.max_kbps, timing, num_slices_h, + dsc_common_caps->bpp_increment_div); + if (*target_bpp_x16 < range.min_kbps) + *target_bpp_x16 = range.min_kbps; should_use_dsc = true; } else { /* not enough bandwidth to fulfill minimum requirement */ @@ -531,18 +585,6 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - if (target_bandwidth_kbps > 0) { - is_dsc_possible = decide_dsc_target_bpp_x16( - &policy, - &dsc_common_caps, - target_bandwidth_kbps, - timing, - &target_bpp); - dsc_cfg->bits_per_pixel = target_bpp; - } - if (!is_dsc_possible) - goto done; - sink_per_slice_throughput_mps = 0; // Validate available DSC settings against the mode timing @@ -690,6 +732,19 @@ static bool setup_dsc_config( dsc_cfg->num_slices_v = pic_height/slice_height; + if (target_bandwidth_kbps > 0) { + is_dsc_possible = decide_dsc_target_bpp_x16( + &policy, + &dsc_common_caps, + target_bandwidth_kbps, + timing, + num_slices_h, + &target_bpp); + dsc_cfg->bits_per_pixel = target_bpp; + } + if (!is_dsc_possible) + goto done; + // Final decission: can we do DSC or not? if (is_dsc_possible) { // Fill out the rest of DSC settings @@ -838,7 +893,8 @@ bool dc_dsc_compute_bandwidth_range( dsc_min_slice_height_override, max_bpp_x16, &config); if (is_dsc_possible) - get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, &dsc_common_caps, timing, range); + get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, + config.num_slices_h, &dsc_common_caps, timing, range); return is_dsc_possible; } @@ -864,13 +920,20 @@ bool dc_dsc_compute_config( return is_dsc_possible; } -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16) +uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, + uint32_t bpp_x16, uint32_t num_slices_h) { - struct fixed31_32 link_bw_kbps; - link_bw_kbps = dc_fixpt_from_int(pix_clk_100hz); - link_bw_kbps = dc_fixpt_div_int(link_bw_kbps, 160); - link_bw_kbps = dc_fixpt_mul_int(link_bw_kbps, bpp_x16); - return dc_fixpt_ceil(link_bw_kbps); + struct fixed31_32 overhead_in_kbps; + struct fixed31_32 bpp; + struct fixed31_32 actual_bandwidth_in_kbps; + + overhead_in_kbps = compute_dsc_max_bandwidth_overhead( + timing, num_slices_h); + bpp = dc_fixpt_from_fraction(bpp_x16, 16); + actual_bandwidth_in_kbps = dc_fixpt_from_fraction(timing->pix_clk_100hz, 10); + actual_bandwidth_in_kbps = dc_fixpt_mul(actual_bandwidth_in_kbps, bpp); + actual_bandwidth_in_kbps = dc_fixpt_add(actual_bandwidth_in_kbps, overhead_in_kbps); + return dc_fixpt_ceil(actual_bandwidth_in_kbps); } void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, struct dc_dsc_policy *policy) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index c6a1cd80aeae..7b294f637881 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -284,26 +284,6 @@ static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp, return bytes_per_pixel; } -static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div) -{ - u32 dsc_target_bpp_x16; - float f_dsc_target_bpp; - float f_stream_bandwidth_100bps; - // bpp_increment_div is actually precision - u32 precision = bpp_increment_div; - - f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; - - // Round down to the nearest precision stop to bring it into DSC spec - // range - dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; - - return dsc_target_bpp_x16; -} - /** * calc_rc_params - reads the user's cmdline mode * @rc: DC internal DSC parameters @@ -367,26 +347,3 @@ u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) DC_FP_END(); return ret; } - -/** - * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel - * @stream_bandwidth_kbps: - * @pix_clk_100hz: - * @bpp_increment_div: - * - * Calculate the total of bits per pixel for DSC configuration. - * - * @note This calculation requires float point operation, most of it executes - * under kernel_fpu_{begin,end}. - */ -u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div) -{ - u32 dsc_bpp; - - DC_FP_START(); - dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, - bpp_increment_div); - DC_FP_END(); - return dsc_bpp; -} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index 8123827840c5..262f06afcbf9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -79,8 +79,6 @@ typedef struct qp_entry qp_table[]; void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); -u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div); #endif -- 2.39.5