Message ID | 20220319204229.9846-5-ramalingam.c@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/ttm: Evict and restore of compressed object | expand |
Hi Ramalingam, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on drm/drm-next drm-tip/drm-tip next-20220318] [cannot apply to v5.17-rc8] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Ramalingam-C/drm-i915-ttm-Evict-and-restore-of-compressed-object/20220320-044242 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20220320/202203200912.4mqFVTe9-lkp@intel.com/config) compiler: gcc-9 (Ubuntu 9.4.0-1ubuntu1~20.04) 9.4.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/afd58bdbf43437bf72ff2313776c3036ebf99a11 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Ramalingam-C/drm-i915-ttm-Evict-and-restore-of-compressed-object/20220320-044242 git checkout afd58bdbf43437bf72ff2313776c3036ebf99a11 # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from include/linux/kernel.h:29, from arch/x86/include/asm/percpu.h:27, from arch/x86/include/asm/current.h:6, from arch/x86/include/asm/processor.h:17, from arch/x86/include/asm/kvm_para.h:5, from arch/x86/include/asm/hypervisor.h:37, from drivers/gpu/drm/i915/i915_drv.h:35, from drivers/gpu/drm/i915/gt/intel_migrate.c:6: drivers/gpu/drm/i915/gt/selftest_migrate.c: In function 'clear': >> include/linux/kern_levels.h:5:18: error: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'unsigned int' [-Werror=format=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ include/linux/printk.h:418:11: note: in definition of macro 'printk_index_wrap' 418 | _p_func(_fmt, ##__VA_ARGS__); \ | ^~~~ include/linux/printk.h:489:2: note: in expansion of macro 'printk' 489 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~ include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' 11 | #define KERN_ERR KERN_SOH "3" /* error conditions */ | ^~~~~~~~ include/linux/printk.h:489:9: note: in expansion of macro 'KERN_ERR' 489 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~ drivers/gpu/drm/i915/gt/selftest_migrate.c:403:6: note: in expansion of macro 'pr_err' 403 | pr_err("%ps ccs clearing failed, offset: %d/%lu\n", | ^~~~~~ In file included from drivers/gpu/drm/i915/gt/intel_migrate.c:1014: drivers/gpu/drm/i915/gt/selftest_migrate.c:403:52: note: format string is defined here 403 | pr_err("%ps ccs clearing failed, offset: %d/%lu\n", | ~~^ | | | long unsigned int | %u drivers/gpu/drm/i915/gt/intel_migrate.c: In function 'intel_context_copy_ccs': >> drivers/gpu/drm/i915/gt/selftest_migrate.c:157:19: error: 'rq' is used uninitialized in this function [-Werror=uninitialized] 157 | offset += (u64)rq->engine->instance << 32; | ~~^~~~~~~~ cc1: all warnings being treated as errors vim +/rq +157 drivers/gpu/drm/i915/gt/selftest_migrate.c 134 135 static int intel_context_copy_ccs(struct intel_context *ce, 136 const struct i915_deps *deps, 137 struct scatterlist *sg, 138 enum i915_cache_level cache_level, 139 bool write_to_ccs, 140 struct i915_request **out) 141 { 142 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; 143 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; 144 struct sgt_dma it = sg_sgt(sg); 145 struct i915_request *rq; 146 u32 offset; 147 int err; 148 149 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 150 *out = NULL; 151 152 GEM_BUG_ON(ce->ring->size < SZ_64K); 153 154 offset = 0; 155 if (HAS_64K_PAGES(ce->engine->i915)) 156 offset = CHUNK_SZ; > 157 offset += (u64)rq->engine->instance << 32; 158 159 do { 160 int len; 161 162 rq = i915_request_create(ce); 163 if (IS_ERR(rq)) { 164 err = PTR_ERR(rq); 165 goto out_ce; 166 } 167 168 if (deps) { 169 err = i915_request_await_deps(rq, deps); 170 if (err) 171 goto out_rq; 172 173 if (rq->engine->emit_init_breadcrumb) { 174 err = rq->engine->emit_init_breadcrumb(rq); 175 if (err) 176 goto out_rq; 177 } 178 179 deps = NULL; 180 } 181 182 /* The PTE updates + clear must not be interrupted. */ 183 err = emit_no_arbitration(rq); 184 if (err) 185 goto out_rq; 186 187 len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ); 188 if (len <= 0) { 189 err = len; 190 goto out_rq; 191 } 192 193 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 194 if (err) 195 goto out_rq; 196 197 err = emit_copy_ccs(rq, offset, dst_access, 198 offset, src_access, len); 199 if (err) 200 goto out_rq; 201 202 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE | 203 MI_FLUSH_DW_CCS); 204 205 /* Arbitration is re-enabled between requests. */ 206 out_rq: 207 if (*out) 208 i915_request_put(*out); 209 *out = i915_request_get(rq); 210 i915_request_add(rq); 211 if (err || !it.sg || !sg_dma_len(it.sg)) 212 break; 213 214 cond_resched(); 215 } while (1); 216 217 out_ce: 218 return err; 219 } 220
On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote: > While clearing the Flat-CCS capable lmem object, we need to clear the > CCS > meta data corresponding to the memory. > > As part of live_migrate_clear add check for the ccs meta data clear > for > the Flat-CCS capable lmem object. > > Signed-off-by: Ramalingam C <ramalingam.c@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_migrate.c | 32 +++ > drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++- > -- > 2 files changed, 278 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c > b/drivers/gpu/drm/i915/gt/intel_migrate.c > index c1db8daf994a..bbfea570c239 100644 > --- a/drivers/gpu/drm/i915/gt/intel_migrate.c > +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c > @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, > u64 src_addr, u64 dst_addr, > return cmd; > } > > +static int emit_copy_ccs(struct i915_request *rq, > + u32 dst_offset, u8 dst_access, > + u32 src_offset, u8 src_access, int size) > +{ > + struct drm_i915_private *i915 = rq->engine->i915; > + int mocs = rq->engine->gt->mocs.uc_index << 1; > + u32 num_ccs_blks, ccs_ring_size; > + u32 *cs; > + > + ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); > + WARN_ON(!ccs_ring_size); > + > + cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), > + NUM_CCS_BYTES_PER_BLOCK); > + > + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); > + cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset, > + src_access, dst_access, > + mocs, mocs, num_ccs_blks); > + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); > + if (ccs_ring_size & 1) > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} This would be an unused function if selftests are not configured, right? > + > static int emit_copy(struct i915_request *rq, > u32 dst_offset, u32 src_offset, int size) > { > diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c > b/drivers/gpu/drm/i915/gt/selftest_migrate.c > index b5da8b8cd039..e32cc994f4a2 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c > +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c > @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate, > return err; > } > > +static int intel_context_copy_ccs(struct intel_context *ce, > + const struct i915_deps *deps, > + struct scatterlist *sg, > + enum i915_cache_level cache_level, > + bool write_to_ccs, > + struct i915_request **out) > +{ > + u8 src_access = write_to_ccs ? DIRECT_ACCESS : > INDIRECT_ACCESS; > + u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : > DIRECT_ACCESS; > + struct sgt_dma it = sg_sgt(sg); > + struct i915_request *rq; > + u32 offset; > + int err; > + > + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); > + *out = NULL; > + > + GEM_BUG_ON(ce->ring->size < SZ_64K); > + > + offset = 0; > + if (HAS_64K_PAGES(ce->engine->i915)) > + offset = CHUNK_SZ; > + offset += (u64)rq->engine->instance << 32; > + > + do { > + int len; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_ce; > + } > + > + if (deps) { > + err = i915_request_await_deps(rq, deps); > + if (err) > + goto out_rq; > + > + if (rq->engine->emit_init_breadcrumb) { > + err = rq->engine- > >emit_init_breadcrumb(rq); > + if (err) > + goto out_rq; > + } > + > + deps = NULL; > + } > + > + /* The PTE updates + clear must not be interrupted. > */ > + err = emit_no_arbitration(rq); > + if (err) > + goto out_rq; > + > + len = emit_pte(rq, &it, cache_level, true, offset, > CHUNK_SZ); > + if (len <= 0) { > + err = len; > + goto out_rq; > + } > + > + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); > + if (err) > + goto out_rq; > + > + err = emit_copy_ccs(rq, offset, dst_access, > + offset, src_access, len); > + if (err) > + goto out_rq; > + > + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE | > + MI_FLUSH_DW_CCS); > + > + /* Arbitration is re-enabled between requests. */ > +out_rq: > + if (*out) > + i915_request_put(*out); > + *out = i915_request_get(rq); > + i915_request_add(rq); > + if (err || !it.sg || !sg_dma_len(it.sg)) > + break; > + > + cond_resched(); > + } while (1); > + > +out_ce: > + return err; > +} > + > +static int > +intel_migrate_ccs_copy(struct intel_migrate *m, > + struct i915_gem_ww_ctx *ww, > + const struct i915_deps *deps, > + struct scatterlist *sg, > + enum i915_cache_level cache_level, > + bool write_to_ccs, > + struct i915_request **out) > +{ > + struct intel_context *ce; > + int err; > + > + *out = NULL; > + if (!m->context) > + return -ENODEV; > + > + ce = intel_migrate_create_context(m); > + if (IS_ERR(ce)) > + ce = intel_context_get(m->context); > + GEM_BUG_ON(IS_ERR(ce)); > + > + err = intel_context_pin_ww(ce, ww); > + if (err) > + goto out; > + > + err = intel_context_copy_ccs(ce, deps, sg, cache_level, > + write_to_ccs, out); > + > + intel_context_unpin(ce); > +out: > + intel_context_put(ce); > + return err; > +} > + > static int clear(struct intel_migrate *migrate, > int (*fn)(struct intel_migrate *migrate, > struct i915_gem_ww_ctx *ww, > @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate, > struct drm_i915_gem_object *obj; > struct i915_request *rq; > struct i915_gem_ww_ctx ww; > - u32 *vaddr; > + u32 *vaddr, val = 0; > + bool ccs_cap = false; > int err = 0; > int i; > > @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate, > /* Consider the rounded up memory too */ > sz = obj->base.size; > > + if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) > + ccs_cap = true; > + > for_i915_gem_ww(&ww, err, true) { > + int ccs_bytes; > + > err = i915_gem_object_lock(obj, &ww); > if (err) > continue; > @@ -170,44 +296,136 @@ static int clear(struct intel_migrate > *migrate, > vaddr[i] = ~i; > i915_gem_object_flush_map(obj); > > - err = fn(migrate, &ww, obj, sz, &rq); > - if (!err) > - continue; > + if (ccs_cap && !val) { > + /* Write the obj data into ccs surface */ > + err = intel_migrate_ccs_copy(migrate, &ww, > NULL, > + obj->mm.pages- > >sgl, > + obj- > >cache_level, > + true, &rq); > + if (rq && !err) { > + if (i915_request_wait(rq, 0, HZ) < 0) > { > + pr_err("%ps timed out, size: > %u\n", > + fn, sz); > + err = -ETIME; > + } > + i915_request_put(rq); > + rq = NULL; > + } > + if (err) > + continue; > + > + for (i = 0; i < sz / sizeof(u32); i++) > + vaddr[i] = 0x5a5a5a5a; > + i915_gem_object_flush_map(obj); > + > + err = intel_migrate_ccs_copy(migrate, &ww, > NULL, obj->mm.pages->sgl, > + obj- > >cache_level, false, &rq); Why do we read back CCS content here? > + if (rq && !err) { > + if (i915_request_wait(rq, 0, HZ) < 0) > { > + pr_err("%ps timed out, size: > %u\n", > + fn, sz); > + err = -ETIME; > + } > + i915_request_put(rq); > + rq = NULL; > + } > + if (err) > + continue; > + > + i915_gem_object_flush_map(obj); > + for (i = 0; !err && i < ccs_bytes; i += 4) { > + if (vaddr[i] != ~i) { > + pr_err("%ps ccs write and > read failed, offset: %d\n", > + fn, i); > + err = -EINVAL; > + } > + } > + if (err) > + continue; > + > + i915_gem_object_flush_map(obj); > + } > > - if (err != -EDEADLK && err != -EINTR && err != - > ERESTARTSYS) > - pr_err("%ps failed, size: %u\n", fn, sz); > - if (rq) { > - i915_request_wait(rq, 0, HZ); > + err = fn(migrate, &ww, obj, val, &rq); > + if (rq && !err) { > + if (i915_request_wait(rq, 0, HZ) < 0) { > + pr_err("%ps timed out, size: %u\n", > fn, sz); > + err = -ETIME; > + } > i915_request_put(rq); > + rq = NULL; > } > - i915_gem_object_unpin_map(obj); > - } > - if (err) > - goto err_out; > + if (err) > + continue; > > - if (rq) { > - if (i915_request_wait(rq, 0, HZ) < 0) { > - pr_err("%ps timed out, size: %u\n", fn, sz); > - err = -ETIME; > + i915_gem_object_flush_map(obj); > + > + /* Verify the set/clear of the obj mem */ > + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { > + int x = i * 1024 + > + i915_prandom_u32_max_state(1024, > prng); > + > + if (vaddr[x] != val) { > + pr_err("%ps failed, (%u != %u), > offset: %zu\n", > + fn, vaddr[x], val, x * > sizeof(u32)); > + igt_hexdump(vaddr + i * 1024, 4096); > + err = -EINVAL; > + } > } > - i915_request_put(rq); > - } > + if (err) > + continue; > > - for (i = 0; !err && i < sz / PAGE_SIZE; i++) { > - int x = i * 1024 + i915_prandom_u32_max_state(1024, > prng); > + if (ccs_cap && !val) { > + for (i = 0; i < sz / sizeof(u32); i++) > + vaddr[i] = ~i; > + i915_gem_object_flush_map(obj); > + > + err = intel_migrate_ccs_copy(migrate, &ww, > NULL, > + obj->mm.pages- > >sgl, > + obj- > >cache_level, > + false, &rq); > + if (rq && !err) { > + if (i915_request_wait(rq, 0, HZ) < 0) > { > + pr_err("%ps timed out, size: > %u\n", > + fn, sz); > + err = -ETIME; > + } > + i915_request_put(rq); > + rq = NULL; > + } > + if (err) > + continue; > + > + ccs_bytes = GET_CCS_BYTES(i915, sz); > + i915_gem_object_flush_map(obj); > + for (i = 0; !err && i < ccs_bytes / > sizeof(u32); i++) { > + if (vaddr[i]) { I think this is incorrect. This assumes that CCS data is read back contiguous for the whole buffer, but instead CCS data is read back per 8MiB chunk and placed at the beginning of each chunk? /Thomas > + pr_err("%ps ccs clearing > failed, offset: %d/%lu\n", > + fn, i, (ccs_bytes / > sizeof(u32)) - 1); > + igt_hexdump(vaddr + i, > ccs_bytes - i * sizeof(u32)); > + err = -EINVAL; > + } > + } > + if (err) > + continue; > + } > + i915_gem_object_unpin_map(obj); > + } > > - if (vaddr[x] != sz) { > - pr_err("%ps failed, size: %u, offset: %zu\n", > - fn, sz, x * sizeof(u32)); > - igt_hexdump(vaddr + i * 1024, 4096); > - err = -EINVAL; > + if (err) { > + if (err != -EDEADLK && err != -EINTR && err != - > ERESTARTSYS) > + pr_err("%ps failed, size: %u\n", fn, sz); > + if (rq && err != -EINVAL) { > + i915_request_wait(rq, 0, HZ); > + i915_request_put(rq); > } > + > + i915_gem_object_unpin_map(obj); > + } else { > + pr_debug("%ps Passed. size: %u\n", fn, sz); > } > > - i915_gem_object_unpin_map(obj); > -err_out: > i915_gem_object_put(obj); > - > return err; > } > ---------------------------------------------------------------------- Intel Sweden AB Registered Office: Isafjordsgatan 30B, 164 40 Kista, Stockholm, Sweden Registration Number: 556189-6027 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
On 2022-03-21 at 16:09:08 +0530, Hellstrom, Thomas wrote: > On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote: > > While clearing the Flat-CCS capable lmem object, we need to clear the > > CCS > > meta data corresponding to the memory. > > > > As part of live_migrate_clear add check for the ccs meta data clear > > for > > the Flat-CCS capable lmem object. > > > > Signed-off-by: Ramalingam C <ramalingam.c@intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_migrate.c | 32 +++ > > drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++- > > -- > > 2 files changed, 278 insertions(+), 28 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c > > b/drivers/gpu/drm/i915/gt/intel_migrate.c > > index c1db8daf994a..bbfea570c239 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_migrate.c > > +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c > > @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, > > u64 src_addr, u64 dst_addr, > > return cmd; > > } > > > > +static int emit_copy_ccs(struct i915_request *rq, > > + u32 dst_offset, u8 dst_access, > > + u32 src_offset, u8 src_access, int size) > > +{ > > + struct drm_i915_private *i915 = rq->engine->i915; > > + int mocs = rq->engine->gt->mocs.uc_index << 1; > > + u32 num_ccs_blks, ccs_ring_size; > > + u32 *cs; > > + > > + ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); > > + WARN_ON(!ccs_ring_size); > > + > > + cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); > > + if (IS_ERR(cs)) > > + return PTR_ERR(cs); > > + > > + num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), > > + NUM_CCS_BYTES_PER_BLOCK); > > + > > + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); > > + cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset, > > + src_access, dst_access, > > + mocs, mocs, num_ccs_blks); > > + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); > > + if (ccs_ring_size & 1) > > + *cs++ = MI_NOOP; > > + > > + intel_ring_advance(rq, cs); > > + > > + return 0; > > +} > > > This would be an unused function if selftests are not configured, > right? No Thomas. This is reused between selftest and eviction flow. in next version i am reusing it for evict_clear too. > > > > + > > static int emit_copy(struct i915_request *rq, > > u32 dst_offset, u32 src_offset, int size) > > { > > diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c > > b/drivers/gpu/drm/i915/gt/selftest_migrate.c > > index b5da8b8cd039..e32cc994f4a2 100644 > > --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c > > +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c > > @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate, > > return err; > > } > > > > +static int intel_context_copy_ccs(struct intel_context *ce, > > + const struct i915_deps *deps, > > + struct scatterlist *sg, > > + enum i915_cache_level cache_level, > > + bool write_to_ccs, > > + struct i915_request **out) > > +{ > > + u8 src_access = write_to_ccs ? DIRECT_ACCESS : > > INDIRECT_ACCESS; > > + u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : > > DIRECT_ACCESS; > > + struct sgt_dma it = sg_sgt(sg); > > + struct i915_request *rq; > > + u32 offset; > > + int err; > > + > > + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); > > + *out = NULL; > > + > > + GEM_BUG_ON(ce->ring->size < SZ_64K); > > + > > + offset = 0; > > + if (HAS_64K_PAGES(ce->engine->i915)) > > + offset = CHUNK_SZ; > > + offset += (u64)rq->engine->instance << 32; > > + > > + do { > > + int len; > > + > > + rq = i915_request_create(ce); > > + if (IS_ERR(rq)) { > > + err = PTR_ERR(rq); > > + goto out_ce; > > + } > > + > > + if (deps) { > > + err = i915_request_await_deps(rq, deps); > > + if (err) > > + goto out_rq; > > + > > + if (rq->engine->emit_init_breadcrumb) { > > + err = rq->engine- > > >emit_init_breadcrumb(rq); > > + if (err) > > + goto out_rq; > > + } > > + > > + deps = NULL; > > + } > > + > > + /* The PTE updates + clear must not be interrupted. > > */ > > + err = emit_no_arbitration(rq); > > + if (err) > > + goto out_rq; > > + > > + len = emit_pte(rq, &it, cache_level, true, offset, > > CHUNK_SZ); > > + if (len <= 0) { > > + err = len; > > + goto out_rq; > > + } > > + > > + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); > > + if (err) > > + goto out_rq; > > + > > + err = emit_copy_ccs(rq, offset, dst_access, > > + offset, src_access, len); > > + if (err) > > + goto out_rq; > > + > > + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE | > > + MI_FLUSH_DW_CCS); > > + > > + /* Arbitration is re-enabled between requests. */ > > +out_rq: > > + if (*out) > > + i915_request_put(*out); > > + *out = i915_request_get(rq); > > + i915_request_add(rq); > > + if (err || !it.sg || !sg_dma_len(it.sg)) > > + break; > > + > > + cond_resched(); > > + } while (1); > > + > > +out_ce: > > + return err; > > +} > > + > > +static int > > +intel_migrate_ccs_copy(struct intel_migrate *m, > > + struct i915_gem_ww_ctx *ww, > > + const struct i915_deps *deps, > > + struct scatterlist *sg, > > + enum i915_cache_level cache_level, > > + bool write_to_ccs, > > + struct i915_request **out) > > +{ > > + struct intel_context *ce; > > + int err; > > + > > + *out = NULL; > > + if (!m->context) > > + return -ENODEV; > > + > > + ce = intel_migrate_create_context(m); > > + if (IS_ERR(ce)) > > + ce = intel_context_get(m->context); > > + GEM_BUG_ON(IS_ERR(ce)); > > + > > + err = intel_context_pin_ww(ce, ww); > > + if (err) > > + goto out; > > + > > + err = intel_context_copy_ccs(ce, deps, sg, cache_level, > > + write_to_ccs, out); > > + > > + intel_context_unpin(ce); > > +out: > > + intel_context_put(ce); > > + return err; > > +} > > + > > static int clear(struct intel_migrate *migrate, > > int (*fn)(struct intel_migrate *migrate, > > struct i915_gem_ww_ctx *ww, > > @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate, > > struct drm_i915_gem_object *obj; > > struct i915_request *rq; > > struct i915_gem_ww_ctx ww; > > - u32 *vaddr; > > + u32 *vaddr, val = 0; > > + bool ccs_cap = false; > > int err = 0; > > int i; > > > > @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate, > > /* Consider the rounded up memory too */ > > sz = obj->base.size; > > > > + if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) > > + ccs_cap = true; > > + > > for_i915_gem_ww(&ww, err, true) { > > + int ccs_bytes; > > + > > err = i915_gem_object_lock(obj, &ww); > > if (err) > > continue; > > @@ -170,44 +296,136 @@ static int clear(struct intel_migrate > > *migrate, > > vaddr[i] = ~i; > > i915_gem_object_flush_map(obj); > > > > - err = fn(migrate, &ww, obj, sz, &rq); > > - if (!err) > > - continue; > > + if (ccs_cap && !val) { > > + /* Write the obj data into ccs surface */ > > + err = intel_migrate_ccs_copy(migrate, &ww, > > NULL, > > + obj->mm.pages- > > >sgl, > > + obj- > > >cache_level, > > + true, &rq); > > + if (rq && !err) { > > + if (i915_request_wait(rq, 0, HZ) < 0) > > { > > + pr_err("%ps timed out, size: > > %u\n", > > + fn, sz); > > + err = -ETIME; > > + } > > + i915_request_put(rq); > > + rq = NULL; > > + } > > + if (err) > > + continue; > > + > > + for (i = 0; i < sz / sizeof(u32); i++) > > + vaddr[i] = 0x5a5a5a5a; > > + i915_gem_object_flush_map(obj); > > + > > + err = intel_migrate_ccs_copy(migrate, &ww, > > NULL, obj->mm.pages->sgl, > > + obj- > > >cache_level, false, &rq); > > Why do we read back CCS content here? Was rechecking the ccs copy. but this is not needed for real intention. Removing in next version. > > > + if (rq && !err) { > > + if (i915_request_wait(rq, 0, HZ) < 0) > > { > > + pr_err("%ps timed out, size: > > %u\n", > > + fn, sz); > > + err = -ETIME; > > + } > > + i915_request_put(rq); > > + rq = NULL; > > + } > > + if (err) > > + continue; > > + > > + i915_gem_object_flush_map(obj); > > + for (i = 0; !err && i < ccs_bytes; i += 4) { > > + if (vaddr[i] != ~i) { > > + pr_err("%ps ccs write and > > read failed, offset: %d\n", > > + fn, i); > > + err = -EINVAL; > > + } > > + } > > + if (err) > > + continue; > > + > > + i915_gem_object_flush_map(obj); > > + } > > > > - if (err != -EDEADLK && err != -EINTR && err != - > > ERESTARTSYS) > > - pr_err("%ps failed, size: %u\n", fn, sz); > > - if (rq) { > > - i915_request_wait(rq, 0, HZ); > > + err = fn(migrate, &ww, obj, val, &rq); > > + if (rq && !err) { > > + if (i915_request_wait(rq, 0, HZ) < 0) { > > + pr_err("%ps timed out, size: %u\n", > > fn, sz); > > + err = -ETIME; > > + } > > i915_request_put(rq); > > + rq = NULL; > > } > > - i915_gem_object_unpin_map(obj); > > - } > > - if (err) > > - goto err_out; > > + if (err) > > + continue; > > > > - if (rq) { > > - if (i915_request_wait(rq, 0, HZ) < 0) { > > - pr_err("%ps timed out, size: %u\n", fn, sz); > > - err = -ETIME; > > + i915_gem_object_flush_map(obj); > > + > > + /* Verify the set/clear of the obj mem */ > > + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { > > + int x = i * 1024 + > > + i915_prandom_u32_max_state(1024, > > prng); > > + > > + if (vaddr[x] != val) { > > + pr_err("%ps failed, (%u != %u), > > offset: %zu\n", > > + fn, vaddr[x], val, x * > > sizeof(u32)); > > + igt_hexdump(vaddr + i * 1024, 4096); > > + err = -EINVAL; > > + } > > } > > - i915_request_put(rq); > > - } > > + if (err) > > + continue; > > > > - for (i = 0; !err && i < sz / PAGE_SIZE; i++) { > > - int x = i * 1024 + i915_prandom_u32_max_state(1024, > > prng); > > + if (ccs_cap && !val) { > > + for (i = 0; i < sz / sizeof(u32); i++) > > + vaddr[i] = ~i; > > + i915_gem_object_flush_map(obj); > > + > > + err = intel_migrate_ccs_copy(migrate, &ww, > > NULL, > > + obj->mm.pages- > > >sgl, > > + obj- > > >cache_level, > > + false, &rq); > > + if (rq && !err) { > > + if (i915_request_wait(rq, 0, HZ) < 0) > > { > > + pr_err("%ps timed out, size: > > %u\n", > > + fn, sz); > > + err = -ETIME; > > + } > > + i915_request_put(rq); > > + rq = NULL; > > + } > > + if (err) > > + continue; > > + > > + ccs_bytes = GET_CCS_BYTES(i915, sz); > > + i915_gem_object_flush_map(obj); > > + for (i = 0; !err && i < ccs_bytes / > > sizeof(u32); i++) { > > + if (vaddr[i]) { > > I think this is incorrect. This assumes that CCS data is read back > contiguous for the whole buffer, but instead CCS data is read back > per 8MiB chunk and placed at the beginning of each chunk? Yes. This is the source for the problem I was discussing with you. Fixed it in the next version. Please share your feedback. could have used a dedicated obj for ccs, but just calculated the offset of the ccs bytes. Ram > > /Thomas > > > > > + pr_err("%ps ccs clearing > > failed, offset: %d/%lu\n", > > + fn, i, (ccs_bytes / > > sizeof(u32)) - 1); > > + igt_hexdump(vaddr + i, > > ccs_bytes - i * sizeof(u32)); > > + err = -EINVAL; > > + } > > + } > > + if (err) > > + continue; > > + } > > + i915_gem_object_unpin_map(obj); > > + } > > > > - if (vaddr[x] != sz) { > > - pr_err("%ps failed, size: %u, offset: %zu\n", > > - fn, sz, x * sizeof(u32)); > > - igt_hexdump(vaddr + i * 1024, 4096); > > - err = -EINVAL; > > + if (err) { > > + if (err != -EDEADLK && err != -EINTR && err != - > > ERESTARTSYS) > > + pr_err("%ps failed, size: %u\n", fn, sz); > > + if (rq && err != -EINVAL) { > > + i915_request_wait(rq, 0, HZ); > > + i915_request_put(rq); > > } > > + > > + i915_gem_object_unpin_map(obj); > > + } else { > > + pr_debug("%ps Passed. size: %u\n", fn, sz); > > } > > > > - i915_gem_object_unpin_map(obj); > > -err_out: > > i915_gem_object_put(obj); > > - > > return err; > > } > > >
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index c1db8daf994a..bbfea570c239 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr, return cmd; } +static int emit_copy_ccs(struct i915_request *rq, + u32 dst_offset, u8 dst_access, + u32 src_offset, u8 src_access, int size) +{ + struct drm_i915_private *i915 = rq->engine->i915; + int mocs = rq->engine->gt->mocs.uc_index << 1; + u32 num_ccs_blks, ccs_ring_size; + u32 *cs; + + ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); + WARN_ON(!ccs_ring_size); + + cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), + NUM_CCS_BYTES_PER_BLOCK); + + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); + cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset, + src_access, dst_access, + mocs, mocs, num_ccs_blks); + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); + if (ccs_ring_size & 1) + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + static int emit_copy(struct i915_request *rq, u32 dst_offset, u32 src_offset, int size) { diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index b5da8b8cd039..e32cc994f4a2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate, return err; } +static int intel_context_copy_ccs(struct intel_context *ce, + const struct i915_deps *deps, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool write_to_ccs, + struct i915_request **out) +{ + u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; + u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; + struct sgt_dma it = sg_sgt(sg); + struct i915_request *rq; + u32 offset; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + offset = 0; + if (HAS_64K_PAGES(ce->engine->i915)) + offset = CHUNK_SZ; + offset += (u64)rq->engine->instance << 32; + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (deps) { + err = i915_request_await_deps(rq, deps); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + deps = NULL; + } + + /* The PTE updates + clear must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_copy_ccs(rq, offset, dst_access, + offset, src_access, len); + if (err) + goto out_rq; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE | + MI_FLUSH_DW_CCS); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it.sg || !sg_dma_len(it.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +static int +intel_migrate_ccs_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + const struct i915_deps *deps, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool write_to_ccs, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_copy_ccs(ce, deps, sg, cache_level, + write_to_ccs, out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + static int clear(struct intel_migrate *migrate, int (*fn)(struct intel_migrate *migrate, struct i915_gem_ww_ctx *ww, @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate, struct drm_i915_gem_object *obj; struct i915_request *rq; struct i915_gem_ww_ctx ww; - u32 *vaddr; + u32 *vaddr, val = 0; + bool ccs_cap = false; int err = 0; int i; @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate, /* Consider the rounded up memory too */ sz = obj->base.size; + if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) + ccs_cap = true; + for_i915_gem_ww(&ww, err, true) { + int ccs_bytes; + err = i915_gem_object_lock(obj, &ww); if (err) continue; @@ -170,44 +296,136 @@ static int clear(struct intel_migrate *migrate, vaddr[i] = ~i; i915_gem_object_flush_map(obj); - err = fn(migrate, &ww, obj, sz, &rq); - if (!err) - continue; + if (ccs_cap && !val) { + /* Write the obj data into ccs surface */ + err = intel_migrate_ccs_copy(migrate, &ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + true, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", + fn, sz); + err = -ETIME; + } + i915_request_put(rq); + rq = NULL; + } + if (err) + continue; + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = 0x5a5a5a5a; + i915_gem_object_flush_map(obj); + + err = intel_migrate_ccs_copy(migrate, &ww, NULL, obj->mm.pages->sgl, + obj->cache_level, false, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", + fn, sz); + err = -ETIME; + } + i915_request_put(rq); + rq = NULL; + } + if (err) + continue; + + i915_gem_object_flush_map(obj); + for (i = 0; !err && i < ccs_bytes; i += 4) { + if (vaddr[i] != ~i) { + pr_err("%ps ccs write and read failed, offset: %d\n", + fn, i); + err = -EINVAL; + } + } + if (err) + continue; + + i915_gem_object_flush_map(obj); + } - if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) - pr_err("%ps failed, size: %u\n", fn, sz); - if (rq) { - i915_request_wait(rq, 0, HZ); + err = fn(migrate, &ww, obj, val, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } i915_request_put(rq); + rq = NULL; } - i915_gem_object_unpin_map(obj); - } - if (err) - goto err_out; + if (err) + continue; - if (rq) { - if (i915_request_wait(rq, 0, HZ) < 0) { - pr_err("%ps timed out, size: %u\n", fn, sz); - err = -ETIME; + i915_gem_object_flush_map(obj); + + /* Verify the set/clear of the obj mem */ + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != val) { + pr_err("%ps failed, (%u != %u), offset: %zu\n", + fn, vaddr[x], val, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } } - i915_request_put(rq); - } + if (err) + continue; - for (i = 0; !err && i < sz / PAGE_SIZE; i++) { - int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + if (ccs_cap && !val) { + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(obj); + + err = intel_migrate_ccs_copy(migrate, &ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + false, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", + fn, sz); + err = -ETIME; + } + i915_request_put(rq); + rq = NULL; + } + if (err) + continue; + + ccs_bytes = GET_CCS_BYTES(i915, sz); + i915_gem_object_flush_map(obj); + for (i = 0; !err && i < ccs_bytes / sizeof(u32); i++) { + if (vaddr[i]) { + pr_err("%ps ccs clearing failed, offset: %d/%lu\n", + fn, i, (ccs_bytes / sizeof(u32)) - 1); + igt_hexdump(vaddr + i, ccs_bytes - i * sizeof(u32)); + err = -EINVAL; + } + } + if (err) + continue; + } + i915_gem_object_unpin_map(obj); + } - if (vaddr[x] != sz) { - pr_err("%ps failed, size: %u, offset: %zu\n", - fn, sz, x * sizeof(u32)); - igt_hexdump(vaddr + i * 1024, 4096); - err = -EINVAL; + if (err) { + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq && err != -EINVAL) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); } + + i915_gem_object_unpin_map(obj); + } else { + pr_debug("%ps Passed. size: %u\n", fn, sz); } - i915_gem_object_unpin_map(obj); -err_out: i915_gem_object_put(obj); - return err; }
While clearing the Flat-CCS capable lmem object, we need to clear the CCS meta data corresponding to the memory. As part of live_migrate_clear add check for the ccs meta data clear for the Flat-CCS capable lmem object. Signed-off-by: Ramalingam C <ramalingam.c@intel.com> --- drivers/gpu/drm/i915/gt/intel_migrate.c | 32 +++ drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++--- 2 files changed, 278 insertions(+), 28 deletions(-)