diff mbox

[3/3] drm/i915/gen9: WM memory bandwidth related workaround

Message ID 20170131145720.11961-4-mahesh1.kumar@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kumar, Mahesh Jan. 31, 2017, 2:57 p.m. UTC
This patch enables workarounds related to display arbitrated memory
bandwidth only if it's necessary. WA's are applicable for all GEN9
based platforms.

Changes since v1:
 - Rebase on top of Paulo's patch series
Changes since v2:
 - Address review comments
 - Rebase/rework as per other patch changes in series
Changes since v3:
 - Rebase the patch
 - introduce ww_mutex to protect WM operations
 - Protect system memory bandwidth calculation with ww_mutex

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |  15 ++++
 drivers/gpu/drm/i915/intel_display.c |  34 ++++++++
 drivers/gpu/drm/i915/intel_pm.c      | 163 +++++++++++++++++++++++++++++++----
 4 files changed, 194 insertions(+), 19 deletions(-)

Comments

kernel test robot Jan. 31, 2017, 5:08 p.m. UTC | #1
Hi Mahesh,

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on next-20170130]
[cannot apply to v4.10-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Mahesh-Kumar/Enable-IPC-WM-related-WA-s/20170131-230708
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-rhel (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/i915/intel_pm.c: In function 'skl_compute_memory_bandwidth_wm_wa':
>> drivers/gpu/drm/i915/intel_pm.c:4118:56: warning: argument to 'sizeof' in 'memcpy' call is the same expression as the destination; did you mean to dereference it? [-Wsizeof-pointer-memaccess]
     memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
                                                           ^

vim +4118 drivers/gpu/drm/i915/intel_pm.c

  4102			mem_attr->mem_wa = WATERMARK_WA_NONE;
  4103			return 0;
  4104		}
  4105	
  4106		if (!memdev_info->valid)
  4107			goto exit;
  4108	
  4109		/*
  4110		 * We hold wm_mutex lock, so any other flip can't proceed beyond WM
  4111		 * calculation step until this flip writes modified WM values.
  4112		 * So it's safe to read plane_state of other pipes without taking CRTC lock
  4113		 */
  4114		ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx);
  4115		if (ret)
  4116			return ret;
  4117	
> 4118		memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
  4119	
  4120		for_each_crtc_in_state(state, crtc, cstate, i) {
  4121			struct drm_plane *plane;
  4122			const struct drm_plane_state *pstate;
  4123			int active_planes = 0;
  4124			uint32_t max_plane_bw_kbps = 0;
  4125	
  4126			mem_attr->pipe_y_tiled[i] = false;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Jan. 31, 2017, 6:58 p.m. UTC | #2
Hi Mahesh,

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on next-20170130]
[cannot apply to v4.10-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Mahesh-Kumar/Enable-IPC-WM-related-WA-s/20170131-230708
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/string.h:2:0,
                    from include/linux/string.h:18,
                    from arch/x86/include/asm/page_32.h:34,
                    from arch/x86/include/asm/page.h:13,
                    from arch/x86/include/asm/processor.h:17,
                    from include/linux/mutex.h:19,
                    from include/linux/notifier.h:13,
                    from include/linux/clk.h:17,
                    from include/linux/cpufreq.h:14,
                    from drivers/gpu/drm/i915/intel_pm.c:28:
   drivers/gpu/drm/i915/intel_pm.c: In function 'skl_compute_memory_bandwidth_wm_wa':
>> drivers/gpu/drm/i915/intel_pm.c:4118:56: warning: argument to 'sizeof' in '__builtin_memcpy' call is the same expression as the destination; did you mean to dereference it? [-Wsizeof-pointer-memaccess]
     memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
                                                           ^
   arch/x86/include/asm/string_32.h:182:48: note: in definition of macro 'memcpy'
    #define memcpy(t, f, n) __builtin_memcpy(t, f, n)
                                                   ^

vim +4118 drivers/gpu/drm/i915/intel_pm.c

  4102			mem_attr->mem_wa = WATERMARK_WA_NONE;
  4103			return 0;
  4104		}
  4105	
  4106		if (!memdev_info->valid)
  4107			goto exit;
  4108	
  4109		/*
  4110		 * We hold wm_mutex lock, so any other flip can't proceed beyond WM
  4111		 * calculation step until this flip writes modified WM values.
  4112		 * So it's safe to read plane_state of other pipes without taking CRTC lock
  4113		 */
  4114		ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx);
  4115		if (ret)
  4116			return ret;
  4117	
> 4118		memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
  4119	
  4120		for_each_crtc_in_state(state, crtc, cstate, i) {
  4121			struct drm_plane *plane;
  4122			const struct drm_plane_state *pstate;
  4123			int active_planes = 0;
  4124			uint32_t max_plane_bw_kbps = 0;
  4125	
  4126			mem_attr->pipe_y_tiled[i] = false;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Maarten Lankhorst Feb. 1, 2017, 9:55 a.m. UTC | #3
Hey,

Op 31-01-17 om 15:57 schreef Mahesh Kumar:
> This patch enables workarounds related to display arbitrated memory
> bandwidth only if it's necessary. WA's are applicable for all GEN9
> based platforms.
>
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Address review comments
>  - Rebase/rework as per other patch changes in series
> Changes since v3:
>  - Rebase the patch
>  - introduce ww_mutex to protect WM operations
>  - Protect system memory bandwidth calculation with ww_mutex
>
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |  15 ++++
>  drivers/gpu/drm/i915/intel_display.c |  34 ++++++++
>  drivers/gpu/drm/i915/intel_pm.c      | 163 +++++++++++++++++++++++++++++++----
>  4 files changed, 194 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index b8d0dd2811a9..a14b2a9d2e6a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -814,6 +814,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  	mutex_init(&dev_priv->av_mutex);
>  	mutex_init(&dev_priv->wm.wm_mutex);
> +	drm_modeset_lock_init(&dev_priv->wm.wm_ww_mutex);
>  	mutex_init(&dev_priv->pps_mutex);
This is not the right approach wrt serialization. In case of a nonblocking update you need to grab the other crtc states so the update is serialized correctly.

Else you will get races with nonblocking modesets.
Please just grab all other crtc mutexes to update. In fact when changing the workaround you probably need to grab the crtc state as well to make sure all previous updates complete. I think in the last iteration I told you what worked, can't remember for sure.

Lastly this won't work as intended, we need to clean up all callers of lock_all_ctx first.
>  	intel_uc_init_early(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index df24de2b65f2..6e5cdd6c9dfd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1174,6 +1174,13 @@ enum intel_sbi_destination {
>  	SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +	WATERMARK_WA_NONE,
> +	WATERMARK_WA_X_TILED,
> +	WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1744,8 +1751,15 @@ struct skl_ddb_allocation {
>  	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES];
>  };
>  
> +struct skl_mem_bw_attr {
> +	enum watermark_memory_wa mem_wa;
> +	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
> +	bool pipe_y_tiled[I915_MAX_PIPES];
> +};
> +
>  struct skl_wm_values {
>  	unsigned dirty_pipes;
> +	struct skl_mem_bw_attr mem_attr;
>  	struct skl_ddb_allocation ddb;
>  };
>  
> @@ -2348,6 +2362,7 @@ struct drm_i915_private {
>  		 * cstate->wm.need_postvbl_update.
>  		 */
>  		struct mutex wm_mutex;
> +		struct drm_modeset_lock wm_ww_mutex; /* protect DDB/WM redistribution among pipes */
>  
>  		/*
>  		 * Set during HW readout of watermarks/DDB.  Some platforms
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 1aa708b6f55e..de512bd8136b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14589,6 +14589,38 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
>  				  to_intel_plane(plane)->frontbuffer_bit);
>  }
>  
> +static void
> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
> +{
> +	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
> +	const struct drm_crtc *crtc;
> +	const struct drm_crtc_state *cstate;
> +	const struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	struct skl_mem_bw_attr *results = &intel_state->wm_results.mem_attr;
> +	struct skl_mem_bw_attr *hw_vals = &dev_priv->wm.skl_hw.mem_attr;
> +	int i;
> +
> +	if (!IS_GEN9(dev_priv))
> +		return;
> +
> +	if (!memdev_info->valid)
> +		return;
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
> +		hw_vals->pipe_y_tiled[i] = results->pipe_y_tiled[i];
> +	}
> +
> +	hw_vals->mem_wa = results->mem_wa;
> +
> +	/*
> +	 * As we already updated state variables no need to hold the lock,
> +	 * other commits can proceed now with their calculation
> +	 */
> +	drm_modeset_unlock(&dev_priv->wm.wm_ww_mutex);
> +}
Ugh, please don't do this. Ever. Locking is the caller's responsibility
>  /**
>   * intel_atomic_commit - commit validated state object
>   * @dev: DRM device
> @@ -14629,6 +14661,8 @@ static int intel_atomic_commit(struct drm_device *dev,
>  	intel_shared_dpll_swap_state(state);
>  	intel_atomic_track_fbs(state);
>  
> +	intel_update_wm_bw_wa(state);
> +
>  	if (intel_state->modeset) {
>  		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
>  		       sizeof(intel_state->min_pixclk));
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 16e83efa1118..ae43df5cdfd8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2887,21 +2887,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  
>  #define SKL_SAGV_BLOCK_TIME	30 /* µs */
>  
> -/*
> - * FIXME: We still don't have the proper code detect if we need to apply the WA,
> - * so assume we'll always need it in order to avoid underruns.
> - */
> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> -
> -	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
> -	    IS_KABYLAKE(dev_priv))
> -		return true;
> -
> -	return false;
> -}
> -
>  static bool
>  intel_has_sagv(struct drm_i915_private *dev_priv)
>  {
> @@ -3049,7 +3034,8 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
>  
>  		latency = dev_priv->wm.skl_latency[level];
>  
> -		if (skl_needs_memory_bw_wa(intel_state) &&
> +		if (intel_state->wm_results.mem_attr.mem_wa !=
> +		    WATERMARK_WA_NONE &&
>  		    plane->base.state->fb->modifier ==
>  		    I915_FORMAT_MOD_X_TILED)
>  			latency += 15;
> @@ -3581,7 +3567,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	uint32_t y_min_scanlines;
>  	struct intel_atomic_state *state =
>  		to_intel_atomic_state(cstate->base.state);
> -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
> +	enum watermark_memory_wa mem_wa;
>  	bool y_tiled, x_tiled;
>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
> @@ -3597,7 +3583,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>  		latency += 4;
>  
> -	if (apply_memory_bw_wa && x_tiled)
> +	mem_wa = state->wm_results.mem_attr.mem_wa;
> +	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>  		latency += 15;
>  
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
> @@ -3632,7 +3619,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  		y_min_scanlines = 4;
>  	}
>  
> -	if (apply_memory_bw_wa)
> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>  		y_min_scanlines *= 2;
>  
>  	plane_bytes_per_line = width * cpp;
> @@ -4061,6 +4048,16 @@ skl_compute_ddb(struct drm_atomic_state *state)
>  	}
>  
>  	/*
> +	 * If Watermark workaround is changed we need to recalculate
> +	 * watermark values for all active pipes
> +	 */
> +	if (intel_state->wm_results.mem_attr.mem_wa !=
> +				dev_priv->wm.skl_hw.mem_attr.mem_wa) {
> +		realloc_pipes = ~0;
> +		intel_state->wm_results.dirty_pipes = ~0;
> +	}
> +
> +	/*
>  	 * We're not recomputing for the pipes not included in the commit, so
>  	 * make sure we start with the current state.
>  	 */
> @@ -4085,6 +4082,130 @@ skl_compute_ddb(struct drm_atomic_state *state)
>  	return 0;
>  }
>  
> +static int
> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> +{
> +	struct drm_device *dev = state->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	struct skl_mem_bw_attr *mem_attr = &intel_state->wm_results.mem_attr;
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *cstate;
> +	int active_pipes = 0;
> +	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
> +	int display_bw_percentage;
> +	bool y_tile_enabled = false;
> +	int ret, i;
> +
> +	if (!IS_GEN9(dev_priv)) {
> +		mem_attr->mem_wa = WATERMARK_WA_NONE;
> +		return 0;
> +	}
> +
> +	if (!memdev_info->valid)
> +		goto exit;
> +
> +	/*
> +	 * We hold wm_mutex lock, so any other flip can't proceed beyond WM
> +	 * calculation step until this flip writes modified WM values.
> +	 * So it's safe to read plane_state of other pipes without taking CRTC lock
> +	 */
> +	ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx);
> +	if (ret)
> +		return ret;
> +
> +	memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		struct drm_plane *plane;
> +		const struct drm_plane_state *pstate;
> +		int active_planes = 0;
> +		uint32_t max_plane_bw_kbps = 0;
> +
> +		mem_attr->pipe_y_tiled[i] = false;
> +
> +		if (!cstate->active) {
> +			mem_attr->pipe_bw_kbps[i] = 0;
> +			continue;
> +		}
> +
> +		drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
> +								cstate) {
> +			struct drm_framebuffer *fb;
> +			uint32_t plane_bw_kbps;
> +			enum plane_id id = to_intel_plane(plane)->id;
> +
> +			if (!pstate->visible)
> +				continue;
> +
> +			if (WARN_ON(!pstate->fb))
> +				continue;
> +
> +			if (id == PLANE_CURSOR)
> +				continue;
> +
> +			fb = pstate->fb;
> +			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED ||
> +				fb->modifier == I915_FORMAT_MOD_Yf_TILED))
> +				mem_attr->pipe_y_tiled[i] = true;
> +
> +			plane_bw_kbps = skl_adjusted_plane_pixel_rate(
> +						to_intel_crtc_state(cstate),
> +						to_intel_plane_state(pstate));
> +			max_plane_bw_kbps = max(plane_bw_kbps,
> +							max_plane_bw_kbps);
> +			active_planes++;
> +		}
> +		mem_attr->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes;
> +	}
> +
> +	for_each_pipe(dev_priv, i) {
> +		if (mem_attr->pipe_bw_kbps[i]) {
> +			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
> +					mem_attr->pipe_bw_kbps[i]);
> +			active_pipes++;
> +		}
> +		if (mem_attr->pipe_y_tiled[i])
> +			y_tile_enabled = true;
> +	}
> +
> +	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
> +	display_bw_percentage = DIV_ROUND_UP_ULL((uint64_t)total_pipe_bw_kbps *
> +					100, memdev_info->bandwidth_kbps);
> +
> +	/*
> +	 * If there is any Ytile plane enabled and arbitrated display
> +	 * bandwidth > 20% of raw system memory bandwidth
> +	 * Enale Y-tile related WA
> +	 *
> +	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
> +	 * limit = 60%
> +	 * If there is no Ytile plane enabled and
> +	 * arbitrated display bandwidth > Xtile limit
> +	 * Enable X-tile realated WA
> +	 */
> +	if (y_tile_enabled && (display_bw_percentage > 20))
> +		mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
> +	else {
> +		int x_tile_percentage = 60;
> +		enum rank rank = memdev_info->rank;
> +
> +		if ((rank == I915_DRAM_RANK_SINGLE) &&
> +					(memdev_info->num_channels == 2))
> +			x_tile_percentage = 35;
> +
> +		if (display_bw_percentage > x_tile_percentage)
> +			mem_attr->mem_wa = WATERMARK_WA_X_TILED;
> +	}
> +
> +	return 0;
> +
> +exit:
> +	mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
> +	return 0;
> +}
> +
>  static void
>  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>  		     struct skl_wm_values *src,
> @@ -4160,6 +4281,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> +	ret = skl_compute_memory_bandwidth_wm_wa(state);
> +	if (ret)
> +		return ret;
> +
>  	ret = skl_compute_ddb(state);
>  	if (ret)
>  		return ret;
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b8d0dd2811a9..a14b2a9d2e6a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -814,6 +814,7 @@  static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	mutex_init(&dev_priv->modeset_restore_lock);
 	mutex_init(&dev_priv->av_mutex);
 	mutex_init(&dev_priv->wm.wm_mutex);
+	drm_modeset_lock_init(&dev_priv->wm.wm_ww_mutex);
 	mutex_init(&dev_priv->pps_mutex);
 
 	intel_uc_init_early(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index df24de2b65f2..6e5cdd6c9dfd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1174,6 +1174,13 @@  enum intel_sbi_destination {
 	SBI_MPHY,
 };
 
+/* SKL+ Watermark arbitrated display bandwidth Workarounds */
+enum watermark_memory_wa {
+	WATERMARK_WA_NONE,
+	WATERMARK_WA_X_TILED,
+	WATERMARK_WA_Y_TILED,
+};
+
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1744,8 +1751,15 @@  struct skl_ddb_allocation {
 	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES];
 };
 
+struct skl_mem_bw_attr {
+	enum watermark_memory_wa mem_wa;
+	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
+	bool pipe_y_tiled[I915_MAX_PIPES];
+};
+
 struct skl_wm_values {
 	unsigned dirty_pipes;
+	struct skl_mem_bw_attr mem_attr;
 	struct skl_ddb_allocation ddb;
 };
 
@@ -2348,6 +2362,7 @@  struct drm_i915_private {
 		 * cstate->wm.need_postvbl_update.
 		 */
 		struct mutex wm_mutex;
+		struct drm_modeset_lock wm_ww_mutex; /* protect DDB/WM redistribution among pipes */
 
 		/*
 		 * Set during HW readout of watermarks/DDB.  Some platforms
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1aa708b6f55e..de512bd8136b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14589,6 +14589,38 @@  static void intel_atomic_track_fbs(struct drm_atomic_state *state)
 				  to_intel_plane(plane)->frontbuffer_bit);
 }
 
+static void
+intel_update_wm_bw_wa(struct drm_atomic_state *state)
+{
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->dev);
+	const struct drm_crtc *crtc;
+	const struct drm_crtc_state *cstate;
+	const struct memdev_info *memdev_info = &dev_priv->memdev_info;
+	struct skl_mem_bw_attr *results = &intel_state->wm_results.mem_attr;
+	struct skl_mem_bw_attr *hw_vals = &dev_priv->wm.skl_hw.mem_attr;
+	int i;
+
+	if (!IS_GEN9(dev_priv))
+		return;
+
+	if (!memdev_info->valid)
+		return;
+
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
+		hw_vals->pipe_y_tiled[i] = results->pipe_y_tiled[i];
+	}
+
+	hw_vals->mem_wa = results->mem_wa;
+
+	/*
+	 * As we already updated state variables no need to hold the lock,
+	 * other commits can proceed now with their calculation
+	 */
+	drm_modeset_unlock(&dev_priv->wm.wm_ww_mutex);
+}
+
 /**
  * intel_atomic_commit - commit validated state object
  * @dev: DRM device
@@ -14629,6 +14661,8 @@  static int intel_atomic_commit(struct drm_device *dev,
 	intel_shared_dpll_swap_state(state);
 	intel_atomic_track_fbs(state);
 
+	intel_update_wm_bw_wa(state);
+
 	if (intel_state->modeset) {
 		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
 		       sizeof(intel_state->min_pixclk));
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 16e83efa1118..ae43df5cdfd8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2887,21 +2887,6 @@  bool ilk_disable_lp_wm(struct drm_device *dev)
 
 #define SKL_SAGV_BLOCK_TIME	30 /* µs */
 
-/*
- * FIXME: We still don't have the proper code detect if we need to apply the WA,
- * so assume we'll always need it in order to avoid underruns.
- */
-static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
-{
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-
-	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
-	    IS_KABYLAKE(dev_priv))
-		return true;
-
-	return false;
-}
-
 static bool
 intel_has_sagv(struct drm_i915_private *dev_priv)
 {
@@ -3049,7 +3034,8 @@  bool intel_can_enable_sagv(struct drm_atomic_state *state)
 
 		latency = dev_priv->wm.skl_latency[level];
 
-		if (skl_needs_memory_bw_wa(intel_state) &&
+		if (intel_state->wm_results.mem_attr.mem_wa !=
+		    WATERMARK_WA_NONE &&
 		    plane->base.state->fb->modifier ==
 		    I915_FORMAT_MOD_X_TILED)
 			latency += 15;
@@ -3581,7 +3567,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t y_min_scanlines;
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+	enum watermark_memory_wa mem_wa;
 	bool y_tiled, x_tiled;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
@@ -3597,7 +3583,8 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
 		latency += 4;
 
-	if (apply_memory_bw_wa && x_tiled)
+	mem_wa = state->wm_results.mem_attr.mem_wa;
+	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
 		latency += 15;
 
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
@@ -3632,7 +3619,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		y_min_scanlines = 4;
 	}
 
-	if (apply_memory_bw_wa)
+	if (mem_wa == WATERMARK_WA_Y_TILED)
 		y_min_scanlines *= 2;
 
 	plane_bytes_per_line = width * cpp;
@@ -4061,6 +4048,16 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	}
 
 	/*
+	 * If Watermark workaround is changed we need to recalculate
+	 * watermark values for all active pipes
+	 */
+	if (intel_state->wm_results.mem_attr.mem_wa !=
+				dev_priv->wm.skl_hw.mem_attr.mem_wa) {
+		realloc_pipes = ~0;
+		intel_state->wm_results.dirty_pipes = ~0;
+	}
+
+	/*
 	 * We're not recomputing for the pipes not included in the commit, so
 	 * make sure we start with the current state.
 	 */
@@ -4085,6 +4082,130 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	return 0;
 }
 
+static int
+skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct memdev_info *memdev_info = &dev_priv->memdev_info;
+	struct skl_mem_bw_attr *mem_attr = &intel_state->wm_results.mem_attr;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
+	int active_pipes = 0;
+	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
+	int display_bw_percentage;
+	bool y_tile_enabled = false;
+	int ret, i;
+
+	if (!IS_GEN9(dev_priv)) {
+		mem_attr->mem_wa = WATERMARK_WA_NONE;
+		return 0;
+	}
+
+	if (!memdev_info->valid)
+		goto exit;
+
+	/*
+	 * We hold wm_mutex lock, so any other flip can't proceed beyond WM
+	 * calculation step until this flip writes modified WM values.
+	 * So it's safe to read plane_state of other pipes without taking CRTC lock
+	 */
+	ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx);
+	if (ret)
+		return ret;
+
+	memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
+
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		struct drm_plane *plane;
+		const struct drm_plane_state *pstate;
+		int active_planes = 0;
+		uint32_t max_plane_bw_kbps = 0;
+
+		mem_attr->pipe_y_tiled[i] = false;
+
+		if (!cstate->active) {
+			mem_attr->pipe_bw_kbps[i] = 0;
+			continue;
+		}
+
+		drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
+								cstate) {
+			struct drm_framebuffer *fb;
+			uint32_t plane_bw_kbps;
+			enum plane_id id = to_intel_plane(plane)->id;
+
+			if (!pstate->visible)
+				continue;
+
+			if (WARN_ON(!pstate->fb))
+				continue;
+
+			if (id == PLANE_CURSOR)
+				continue;
+
+			fb = pstate->fb;
+			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+				fb->modifier == I915_FORMAT_MOD_Yf_TILED))
+				mem_attr->pipe_y_tiled[i] = true;
+
+			plane_bw_kbps = skl_adjusted_plane_pixel_rate(
+						to_intel_crtc_state(cstate),
+						to_intel_plane_state(pstate));
+			max_plane_bw_kbps = max(plane_bw_kbps,
+							max_plane_bw_kbps);
+			active_planes++;
+		}
+		mem_attr->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes;
+	}
+
+	for_each_pipe(dev_priv, i) {
+		if (mem_attr->pipe_bw_kbps[i]) {
+			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
+					mem_attr->pipe_bw_kbps[i]);
+			active_pipes++;
+		}
+		if (mem_attr->pipe_y_tiled[i])
+			y_tile_enabled = true;
+	}
+
+	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
+	display_bw_percentage = DIV_ROUND_UP_ULL((uint64_t)total_pipe_bw_kbps *
+					100, memdev_info->bandwidth_kbps);
+
+	/*
+	 * If there is any Ytile plane enabled and arbitrated display
+	 * bandwidth > 20% of raw system memory bandwidth
+	 * Enale Y-tile related WA
+	 *
+	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
+	 * limit = 60%
+	 * If there is no Ytile plane enabled and
+	 * arbitrated display bandwidth > Xtile limit
+	 * Enable X-tile realated WA
+	 */
+	if (y_tile_enabled && (display_bw_percentage > 20))
+		mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
+	else {
+		int x_tile_percentage = 60;
+		enum rank rank = memdev_info->rank;
+
+		if ((rank == I915_DRAM_RANK_SINGLE) &&
+					(memdev_info->num_channels == 2))
+			x_tile_percentage = 35;
+
+		if (display_bw_percentage > x_tile_percentage)
+			mem_attr->mem_wa = WATERMARK_WA_X_TILED;
+	}
+
+	return 0;
+
+exit:
+	mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
+	return 0;
+}
+
 static void
 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
 		     struct skl_wm_values *src,
@@ -4160,6 +4281,10 @@  skl_compute_wm(struct drm_atomic_state *state)
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	ret = skl_compute_memory_bandwidth_wm_wa(state);
+	if (ret)
+		return ret;
+
 	ret = skl_compute_ddb(state);
 	if (ret)
 		return ret;