Message ID | 20160909080106.17506-6-mahesh1.kumar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Op 09-09-16 om 10:01 schreef Kumar, Mahesh: > From: Mahesh Kumar <mahesh1.kumar@intel.com> > > This patch implemnets Workarounds related to display arbitrated memory > bandwidth. These WA are applicabe for all gen-9 based platforms. > > Changes since v1: > - Rebase on top of Paulo's patch series > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > drivers/gpu/drm/i915/intel_drv.h | 11 +++ > drivers/gpu/drm/i915/intel_pm.c | 145 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 165 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 4313992..4737a0e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1074,6 +1074,13 @@ enum intel_sbi_destination { > SBI_MPHY, > }; > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > +enum watermark_memory_wa { > + WATERMARK_WA_NONE, > + WATERMARK_WA_X_TILED, > + WATERMARK_WA_Y_TILED, > +}; > + > #define QUIRK_PIPEA_FORCE (1<<0) > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > @@ -1623,6 +1630,8 @@ struct skl_ddb_allocation { > > struct skl_wm_values { > unsigned dirty_pipes; > + /* any WaterMark memory workaround Required */ > + enum watermark_memory_wa mem_wa; > struct skl_ddb_allocation ddb; > uint32_t wm_linetime[I915_MAX_PIPES]; > uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8]; > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 6cd7e8a..66cb46c 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1800,6 +1800,17 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, > return to_intel_crtc_state(crtc_state); > } > > +static inline struct intel_crtc_state * > +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, > + struct intel_crtc *crtc) > +{ > + struct drm_crtc_state *crtc_state; > + > + crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base); > + > + return to_intel_crtc_state(crtc_state); > +} > + > static inline struct intel_plane_state * > intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, > struct intel_plane *plane) > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 7c70e07..0ec328b 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, > { > struct drm_plane_state *pstate = &intel_pstate->base; > struct drm_framebuffer *fb = pstate->fb; > + struct intel_atomic_state *intel_state = > + to_intel_atomic_state(cstate->base.state); > uint32_t latency = dev_priv->wm.skl_latency[level]; > uint32_t method1, method2; > uint32_t plane_bytes_per_line, plane_blocks_per_line; > @@ -3602,10 +3604,17 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, > struct skl_wm_level *result = &pipe_wm->wm[level]; > uint16_t *out_blocks = &result->plane_res_b[id]; > uint8_t *out_lines = &result->plane_res_l[id]; > + enum watermark_memory_wa mem_wa; > > if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) > return 0; > > + mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE; > + if (mem_wa != WATERMARK_WA_NONE) { > + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) > + latency += 15; > + } > + > width = drm_rect_width(&intel_pstate->base.src) >> 16; > height = drm_rect_height(&intel_pstate->base.src) >> 16; > > @@ -3637,6 +3646,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, > y_min_scanlines = 4; > } > > + if (mem_wa == WATERMARK_WA_Y_TILED) > + y_min_scanlines *= 2; > + > plane_bytes_per_line = width * cpp; > if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || > fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { I don't have y_min_scanlines in nightly? What is this series based on? It doesn't apply cleanly at least.. ~Maarten
Op 12-09-16 om 13:02 schreef Maarten Lankhorst: > Op 09-09-16 om 10:01 schreef Kumar, Mahesh: >> From: Mahesh Kumar <mahesh1.kumar@intel.com> >> >> This patch implemnets Workarounds related to display arbitrated memory >> bandwidth. These WA are applicabe for all gen-9 based platforms. >> >> Changes since v1: >> - Rebase on top of Paulo's patch series >> >> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 9 +++ >> drivers/gpu/drm/i915/intel_drv.h | 11 +++ >> drivers/gpu/drm/i915/intel_pm.c | 145 +++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 165 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 4313992..4737a0e 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1074,6 +1074,13 @@ enum intel_sbi_destination { >> SBI_MPHY, >> }; >> >> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ >> +enum watermark_memory_wa { >> + WATERMARK_WA_NONE, >> + WATERMARK_WA_X_TILED, >> + WATERMARK_WA_Y_TILED, >> +}; >> + >> #define QUIRK_PIPEA_FORCE (1<<0) >> #define QUIRK_LVDS_SSC_DISABLE (1<<1) >> #define QUIRK_INVERT_BRIGHTNESS (1<<2) >> @@ -1623,6 +1630,8 @@ struct skl_ddb_allocation { >> >> struct skl_wm_values { >> unsigned dirty_pipes; >> + /* any WaterMark memory workaround Required */ >> + enum watermark_memory_wa mem_wa; >> struct skl_ddb_allocation ddb; >> uint32_t wm_linetime[I915_MAX_PIPES]; >> uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8]; >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h >> index 6cd7e8a..66cb46c 100644 >> --- a/drivers/gpu/drm/i915/intel_drv.h >> +++ b/drivers/gpu/drm/i915/intel_drv.h >> @@ -1800,6 +1800,17 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, >> return to_intel_crtc_state(crtc_state); >> } >> >> +static inline struct intel_crtc_state * >> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, >> + struct intel_crtc *crtc) >> +{ >> + struct drm_crtc_state *crtc_state; >> + >> + crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base); >> + >> + return to_intel_crtc_state(crtc_state); >> +} >> + >> static inline struct intel_plane_state * >> intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, >> struct intel_plane *plane) >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c >> index 7c70e07..0ec328b 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, >> { >> struct drm_plane_state *pstate = &intel_pstate->base; >> struct drm_framebuffer *fb = pstate->fb; >> + struct intel_atomic_state *intel_state = >> + to_intel_atomic_state(cstate->base.state); >> uint32_t latency = dev_priv->wm.skl_latency[level]; >> uint32_t method1, method2; >> uint32_t plane_bytes_per_line, plane_blocks_per_line; >> @@ -3602,10 +3604,17 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, >> struct skl_wm_level *result = &pipe_wm->wm[level]; >> uint16_t *out_blocks = &result->plane_res_b[id]; >> uint8_t *out_lines = &result->plane_res_l[id]; >> + enum watermark_memory_wa mem_wa; >> >> if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) >> return 0; >> >> + mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE; >> + if (mem_wa != WATERMARK_WA_NONE) { >> + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) >> + latency += 15; >> + } >> + >> width = drm_rect_width(&intel_pstate->base.src) >> 16; >> height = drm_rect_height(&intel_pstate->base.src) >> 16; >> >> @@ -3637,6 +3646,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, >> y_min_scanlines = 4; >> } >> >> + if (mem_wa == WATERMARK_WA_Y_TILED) >> + y_min_scanlines *= 2; >> + >> plane_bytes_per_line = width * cpp; >> if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || >> fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { > I don't have y_min_scanlines in nightly? What is this series based on? > It doesn't apply cleanly at least.. Ah nevermind, applies on top of Paulo's series.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4313992..4737a0e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1074,6 +1074,13 @@ enum intel_sbi_destination { SBI_MPHY, }; +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ +enum watermark_memory_wa { + WATERMARK_WA_NONE, + WATERMARK_WA_X_TILED, + WATERMARK_WA_Y_TILED, +}; + #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) @@ -1623,6 +1630,8 @@ struct skl_ddb_allocation { struct skl_wm_values { unsigned dirty_pipes; + /* any WaterMark memory workaround Required */ + enum watermark_memory_wa mem_wa; struct skl_ddb_allocation ddb; uint32_t wm_linetime[I915_MAX_PIPES]; uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8]; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6cd7e8a..66cb46c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1800,6 +1800,17 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } +static inline struct intel_crtc_state * +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_crtc_state *crtc_state; + + crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base); + + return to_intel_crtc_state(crtc_state); +} + static inline struct intel_plane_state * intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, struct intel_plane *plane) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7c70e07..0ec328b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, { struct drm_plane_state *pstate = &intel_pstate->base; struct drm_framebuffer *fb = pstate->fb; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(cstate->base.state); uint32_t latency = dev_priv->wm.skl_latency[level]; uint32_t method1, method2; uint32_t plane_bytes_per_line, plane_blocks_per_line; @@ -3602,10 +3604,17 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, struct skl_wm_level *result = &pipe_wm->wm[level]; uint16_t *out_blocks = &result->plane_res_b[id]; uint8_t *out_lines = &result->plane_res_l[id]; + enum watermark_memory_wa mem_wa; if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) return 0; + mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE; + if (mem_wa != WATERMARK_WA_NONE) { + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) + latency += 15; + } + width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; @@ -3637,6 +3646,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } + if (mem_wa == WATERMARK_WA_Y_TILED) + y_min_scanlines *= 2; + plane_bytes_per_line = width * cpp; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { @@ -4041,6 +4053,15 @@ skl_include_affected_pipes(struct drm_atomic_state *state) intel_state->wm_results.dirty_pipes = ~0; } + /* + * If Watermark workaround is changed we need to recalculate + * watermark values for all active pipes + */ + if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } + for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { struct intel_crtc_state *cstate; @@ -4057,6 +4078,128 @@ skl_include_affected_pipes(struct drm_atomic_state *state) } static void +skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc; + struct intel_plane_state *intel_pstate; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int num_active_plane, num_active_pipe; + uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw; + uint32_t total_pipe_bw; + uint32_t system_bw = 0; + uint8_t num_channel, data_width, rank; + int x_tile_per; + int display_bw_per; + bool y_tile_enabled = false; + + if (!dev_priv->memdev_info.valid) + goto exit; + + num_channel = dev_priv->memdev_info.num_channel; + data_width = dev_priv->memdev_info.data_width; + system_bw = dev_priv->memdev_info.mem_speed * num_channel * data_width; + + if (!system_bw) + goto exit; + + max_pipe_bw = 0; + for_each_intel_crtc(dev, intel_crtc) { + struct intel_crtc_state *cstate; + struct intel_plane *plane; + + /* + * If CRTC is part of current atomic commit, get crtc state from + * existing CRTC state. else take the cached CRTC state + */ + cstate = NULL; + if (state) + cstate = intel_atomic_get_existing_crtc_state(state, + intel_crtc); + if (!cstate) + cstate = to_intel_crtc_state(intel_crtc->base.state); + + if (!cstate->base.active) + continue; + + num_active_plane = 0; + max_plane_bw = 0; + for_each_intel_plane_mask(dev, plane, cstate->base.plane_mask) { + struct drm_framebuffer *fb = NULL; + + intel_pstate = NULL; + if (state) + intel_pstate = + intel_atomic_get_existing_plane_state(state, + plane); + if (!intel_pstate) + intel_pstate = + to_intel_plane_state(plane->base.state); + + WARN_ON(!intel_pstate->base.fb); + + if (!intel_pstate->base.visible) + continue; + + fb = intel_pstate->base.fb; + if (fb && (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)) + y_tile_enabled = true; + + plane_bw = skl_adjusted_plane_pixel_rate(cstate, + intel_pstate); + max_plane_bw = max(plane_bw, max_plane_bw); + num_active_plane++; + } + pipe_bw = max_plane_bw * num_active_plane; + max_pipe_bw = max(pipe_bw, max_pipe_bw); + } + + if (intel_state->active_pipe_changes) + num_active_pipe = hweight32(intel_state->active_crtcs); + else + num_active_pipe = hweight32(dev_priv->active_crtcs); + + total_pipe_bw = max_pipe_bw * num_active_pipe; + + display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100, system_bw * 1000); + + /* + * If there is any Ytile plane enabled and arbitrated display + * bandwidth > 20% of raw system memory bandwidth + * Enale Y-tile related WA + * + * If memory is dual channel single rank, Xtile limit = 35%, else Xtile + * limit = 60% + * If there is no Ytile plane enabled and + * arbitrated display bandwidth > Xtile limit + * Enable X-tile realated WA + */ + if (y_tile_enabled && (display_bw_per > 20)) + intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED; + else { + + if (dev_priv->memdev_info.rank_valid) + rank = dev_priv->memdev_info.rank; + else + rank = DRAM_DUAL_RANK; /* Assume we are dual rank */ + + if ((rank == DRAM_SINGLE_RANK) && (num_channel == 2)) + x_tile_per = 35; + else + x_tile_per = 60; + + if (display_bw_per > x_tile_per) + intel_state->wm_results.mem_wa = WATERMARK_WA_X_TILED; + } + return; + +exit: + intel_state->wm_results.mem_wa = WATERMARK_WA_NONE; +} + +static void skl_copy_wm_for_pipe(struct skl_wm_values *dst, struct skl_wm_values *src, enum pipe pipe) @@ -4101,6 +4244,8 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; + skl_set_memory_bandwidth_wm_wa(state); + ret = skl_include_affected_pipes(state); if (ret) return ret;