Message ID | 1490818152-10891-3-git-send-email-mario.kleiner.de@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Mar 29, 2017 at 4:09 PM, Mario Kleiner <mario.kleiner.de@gmail.com> wrote: > At dot clocks > approx. 250 Mhz, some of these calcs will overflow and > cause miscalculation of latency watermarks, and for some overflows also > divide-by-zero driver crash ("divide error: 0000 [#1] PREEMPT SMP" in > "dce_v10_0_latency_watermark+0x12d/0x190"). > > This zero-divide happened, e.g., on AMD Tonga Pro under DCE-10, > on a Displayport panel when trying to set a video mode of 2560x1440 > at 165 Hz vrefresh with a dot clock of 635.540 Mhz. > > Refine calculations to avoid the overflows. > > Tested for DCE-10 with R9 380 Tonga + ASUS ROG PG279 panel. > > Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> Applied the series. thanks! Alex > --- > drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 19 +++---------------- > drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 19 +++---------------- > drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 19 +++---------------- > drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 19 +++---------------- > 4 files changed, 12 insertions(+), 64 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c > index d3db921..33541ac 100644 > --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c > @@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm) > a.full = dfixed_const(available_bandwidth); > b.full = dfixed_const(wm->num_heads); > a.full = dfixed_div(a, b); > + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); > + tmp = min(dfixed_trunc(a), tmp); > > - b.full = dfixed_const(mc_latency + 512); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(b, c); > - > - c.full = dfixed_const(dmif_size); > - b.full = dfixed_div(c, b); > - > - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); > - > - b.full = dfixed_const(1000); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(c, b); > - c.full = dfixed_const(wm->bytes_per_pixel); > - b.full = dfixed_mul(b, c); > - > - lb_fill_bw = min(tmp, dfixed_trunc(b)); > + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); > > a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); > b.full = dfixed_const(1000); > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c > index 15ee8eb..1388f8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c > @@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) > a.full = dfixed_const(available_bandwidth); > b.full = dfixed_const(wm->num_heads); > a.full = dfixed_div(a, b); > + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); > + tmp = min(dfixed_trunc(a), tmp); > > - b.full = dfixed_const(mc_latency + 512); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(b, c); > - > - c.full = dfixed_const(dmif_size); > - b.full = dfixed_div(c, b); > - > - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); > - > - b.full = dfixed_const(1000); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(c, b); > - c.full = dfixed_const(wm->bytes_per_pixel); > - b.full = dfixed_mul(b, c); > - > - lb_fill_bw = min(tmp, dfixed_trunc(b)); > + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); > > a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); > b.full = dfixed_const(1000); > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c > index cb9158b..bad52c0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c > @@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm) > a.full = dfixed_const(available_bandwidth); > b.full = dfixed_const(wm->num_heads); > a.full = dfixed_div(a, b); > + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); > + tmp = min(dfixed_trunc(a), tmp); > > - b.full = dfixed_const(mc_latency + 512); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(b, c); > - > - c.full = dfixed_const(dmif_size); > - b.full = dfixed_div(c, b); > - > - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); > - > - b.full = dfixed_const(1000); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(c, b); > - c.full = dfixed_const(wm->bytes_per_pixel); > - b.full = dfixed_mul(b, c); > - > - lb_fill_bw = min(tmp, dfixed_trunc(b)); > + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); > > a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); > b.full = dfixed_const(1000); > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c > index d547bcf..e52fc92 100644 > --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c > @@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm) > a.full = dfixed_const(available_bandwidth); > b.full = dfixed_const(wm->num_heads); > a.full = dfixed_div(a, b); > + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); > + tmp = min(dfixed_trunc(a), tmp); > > - b.full = dfixed_const(mc_latency + 512); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(b, c); > - > - c.full = dfixed_const(dmif_size); > - b.full = dfixed_div(c, b); > - > - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); > - > - b.full = dfixed_const(1000); > - c.full = dfixed_const(wm->disp_clk); > - b.full = dfixed_div(c, b); > - c.full = dfixed_const(wm->bytes_per_pixel); > - b.full = dfixed_mul(b, c); > - > - lb_fill_bw = min(tmp, dfixed_trunc(b)); > + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); > > a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); > b.full = dfixed_const(1000); > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index d3db921..33541ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 15ee8eb..1388f8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index cb9158b..bad52c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index d547bcf..e52fc92 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000);
At dot clocks > approx. 250 Mhz, some of these calcs will overflow and cause miscalculation of latency watermarks, and for some overflows also divide-by-zero driver crash ("divide error: 0000 [#1] PREEMPT SMP" in "dce_v10_0_latency_watermark+0x12d/0x190"). This zero-divide happened, e.g., on AMD Tonga Pro under DCE-10, on a Displayport panel when trying to set a video mode of 2560x1440 at 165 Hz vrefresh with a dot clock of 635.540 Mhz. Refine calculations to avoid the overflows. Tested for DCE-10 with R9 380 Tonga + ASUS ROG PG279 panel. Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 19 +++---------------- 4 files changed, 12 insertions(+), 64 deletions(-)