Message ID | 20230130101436.924402-1-andrzej.hajda@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5] drm/i915/gt: Add selftests for TLB invalidation | expand |
On 30/01/2023 10:14, Andrzej Hajda wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > Check that we invalidate the TLB cache, the updated physical addresses > are immediately visible to the HW, and there is no retention of the old > physical address for concurrent HW access. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > [ahajda: adjust to upstream driver, v2+] > Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> > --- > v2: > - addressed comments (Tvrtko), > - changed pin/sample address calculation, > - removed checks for platforms older than 8, > - use low ints in MI_DO_COMPARE to be more clear, > - continue test if physical addresses have the same uppper 32 bits, > - consolidate two calls to pte_tlbinv into one > v3: > - skip pages not supported by vm (CI reported EINVAL), > - fix dw size in MI_CONDITIONAL_BATCH_BUFFER_END for gen8 (CI reported EIO), > - remove aggressive allocation to get different upper halves of physical > address (CI reported OOM). > v4: > - align address in MI_CONDITIONAL_BATCH_BUFFER_END to 8b, > - set QWORD pointed by addr in above cmd, as required by Gen8/VCS. > v5: > - set dw size again to 2 (CI reports EIO due to semaphore sanitycheck). > > Sorry for spamming, but apparently CI is the only way to test Gen8. > --- > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + > drivers/gpu/drm/i915/gt/intel_gt.c | 4 + > drivers/gpu/drm/i915/gt/selftest_tlb.c | 379 ++++++++++++++++++ > .../drm/i915/selftests/i915_live_selftests.h | 1 + > 4 files changed, 385 insertions(+) > create mode 100644 drivers/gpu/drm/i915/gt/selftest_tlb.c > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > index 2af1ae3831df98..e10507fa71ce63 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > @@ -394,6 +394,7 @@ > #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) > #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) > #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) > +#define MI_DO_COMPARE REG_BIT(21) > > #define STATE_BASE_ADDRESS \ > ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > index f0dbfc434e0773..001a7ec5b86182 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -1205,3 +1205,7 @@ void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) > mutex_unlock(>->tlb.invalidate_lock); > } > } > + > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > +#include "selftest_tlb.c" > +#endif > diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c > new file mode 100644 > index 00000000000000..166d18a614d51d > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c > @@ -0,0 +1,379 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2022 Intel Corporation > + */ > + > +#include "i915_selftest.h" > + > +#include "gem/i915_gem_internal.h" > +#include "gem/i915_gem_region.h" > + > +#include "gen8_engine_cs.h" > +#include "i915_gem_ww.h" > +#include "intel_engine_regs.h" > +#include "intel_gpu_commands.h" > +#include "intel_context.h" > +#include "intel_gt.h" > +#include "intel_ring.h" > + > +#include "selftests/igt_flush_test.h" > +#include "selftests/i915_random.h" > + > +static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) > +{ > + GEM_BUG_ON(addr < i915_vma_offset(vma)); > + GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); > + memset64(page_mask_bits(vma->obj->mm.mapping) + > + (addr - i915_vma_offset(vma)), val, 1); > +} > + > +static int > +pte_tlbinv(struct intel_context *ce, > + struct i915_vma *va, > + struct i915_vma *vb, > + u64 align, > + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), > + u64 length, > + struct rnd_state *prng) > +{ > + struct drm_i915_gem_object *batch; > + struct i915_request *rq; > + struct i915_vma *vma; > + u64 addr; > + int err; > + u32 *cs; > + > + batch = i915_gem_object_create_internal(ce->vm->i915, 4096); > + if (IS_ERR(batch)) > + return PTR_ERR(batch); > + > + vma = i915_vma_instance(batch, ce->vm, NULL); > + if (IS_ERR(vma)) { > + err = PTR_ERR(vma); > + goto out; > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (err) > + goto out; > + > + /* Pin va at random but aligned offset after vma */ > + addr = round_up(vma->node.start + vma->node.size, align); > + /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ > + addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)), > + va->size, align); > + err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); > + if (err) { > + pr_err("Cannot pin at %llx+%llx\n", addr, va->size); > + goto out; > + } > + GEM_BUG_ON(i915_vma_offset(va) != addr); > + vb->node = va->node; /* overwrites the _same_ PTE */ > + > + /* > + * Now choose random dword at the 1st pinned page. > + * > + * SZ_64K pages on dg1 require that the whole PT be marked > + * containing 64KiB entries. So we make sure that vma > + * covers the whole PT, despite being randomly aligned to 64KiB > + * and restrict our sampling to the 2MiB PT within where > + * we know that we will be using 64KiB pages. > + */ > + if (align == SZ_64K) > + addr = round_up(addr, SZ_2M); > + addr = igt_random_offset(prng, addr, addr + align, 8, 8); > + > + pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n", > + ce->engine->name, va->obj->mm.region->name ?: "smem", > + addr, align, va->resource->page_sizes_gtt, va->page_sizes.phys, > + va->page_sizes.sg, addr & -length, length); Worth skipping this log if va == vb? Or logging something different like "... Sanity checking ...". > + > + cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); > + *cs++ = MI_NOOP; /* for later termination */ > + /* > + * Sample the target to see if we spot an incorrect page. s/incorrect page/updated backing store/? Or something like that. > + * Gen8 VCS compares immediate value with bitwise-and of two > + * consecutive DWORDS pointed by addr, other gen/engines compare value > + * with DWORD pointed by addr. Moreover we want to exercise DWORD size > + * invalidations. To fulfill all these requirements below values > + * has been chosen. s/has/have/ > + */ > + *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; > + *cs++ = 0; /* break if *addr == 0 */ > + *cs++ = lower_32_bits(addr); > + *cs++ = upper_32_bits(addr); > + vma_set_qw(va, addr, -1); > + vma_set_qw(vb, addr, 0); > + > + /* Keep sampling until we get bored */ > + *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; > + *cs++ = lower_32_bits(i915_vma_offset(vma)); > + *cs++ = upper_32_bits(i915_vma_offset(vma)); > + > + i915_gem_object_flush_map(batch); > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_va; > + } > + > + err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); > + if (err) { > + i915_request_add(rq); > + goto out_va; > + } > + > + i915_request_get(rq); > + i915_request_add(rq); > + > + /* Short sleep to sanitycheck the batch is spinning before we begin */ > + msleep(10); > + if (va == vb) { > + if (!i915_request_completed(rq)) { > + pr_err("Semaphore sanitycheck failed\n"); > + err = -EIO; > + } > + } else if (!i915_request_completed(rq)) { > + struct i915_vma_resource vb_res = { > + .bi.pages = vb->obj->mm.pages, > + .bi.page_sizes = vb->obj->mm.page_sizes, > + .start = i915_vma_offset(vb), > + .vma_size = i915_vma_size(vb) > + }; > + unsigned int pte_flags = 0; > + > + /* Flip the PTE between A and B */ > + if (i915_gem_object_is_lmem(vb->obj)) > + pte_flags |= PTE_LM; > + ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags); > + > + /* Flush the PTE update to concurrent HW */ > + tlbinv(ce->vm, addr & -length, length); > + > + if (wait_for(i915_request_completed(rq), HZ / 2)) { > + pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n", > + ce->engine->name); > + err = -EINVAL; > + } > + } else { > + pr_err("Spinner ended unexpectedly\n"); > + err = -EIO; > + } > + i915_request_put(rq); > + > + cs = page_mask_bits(batch->mm.mapping); > + *cs = MI_BATCH_BUFFER_END; > + wmb(); > + > +out_va: > + if (vb != va) > + memset(&vb->node, 0, sizeof(vb->node)); This can maybe be a bit fragile. Store a local copy of vb->node and restore from that? > + i915_vma_unpin(va); > + if (i915_vma_unbind_unlocked(va)) > + err = -EIO; > +out: > + i915_gem_object_put(batch); > + return err; > +} > + > +static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) > +{ > + /* > + * Allocation of largest possible page size allows to test all types > + * of pages. > + */ > + return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS); > +} > + > +static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) > +{ > + /* > + * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). > + * While that does not require the whole 2M block to be contiguous > + * it is easier to make it so, since we need that for SZ_2M pagees. > + * Since we randomly offset the start of the vma, we need a 4M object > + * so that there is a 2M range within it is suitable for SZ_64K PTE. > + */ > + return i915_gem_object_create_internal(gt->i915, SZ_4M); > +} > + > +static int > +mem_tlbinv(struct intel_gt *gt, > + struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), > + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) > +{ > + unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; > + struct intel_engine_cs *engine; > + struct drm_i915_gem_object *A, *B; > + struct i915_ppgtt *ppgtt; > + struct i915_vma *va, *vb; > + enum intel_engine_id id; > + I915_RND_STATE(prng); > + void *vaddr; > + int err; > + > + /* > + * Check that the TLB invalidate is able to revoke an active > + * page. We load a page into a spinning COND_BBE loop and then > + * remap that page to a new physical address. The old address, and > + * so the loop keeps spinning, is retained in the TLB cache until > + * we issue an invalidate. > + */ > + > + A = create_fn(gt); > + if (IS_ERR(A)) > + return PTR_ERR(A); > + > + vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); > + if (IS_ERR(vaddr)) { > + err = PTR_ERR(vaddr); > + goto out_a; > + } > + > + B = create_fn(gt); > + if (IS_ERR(B)) { > + err = PTR_ERR(B); > + goto out_a; > + } > + > + vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); > + if (IS_ERR(vaddr)) { > + err = PTR_ERR(vaddr); > + goto out_b; > + } > + > + GEM_BUG_ON(A->base.size != B->base.size); > + if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) > + pr_warn("Failed to allocate contiguous pages for size %zx\n", > + A->base.size); > + > + ppgtt = i915_ppgtt_create(gt, 0); > + if (IS_ERR(ppgtt)) { > + err = PTR_ERR(ppgtt); > + goto out_b; > + } > + > + va = i915_vma_instance(A, &ppgtt->vm, NULL); > + if (IS_ERR(va)) { > + err = PTR_ERR(va); > + goto out_vm; > + } > + > + vb = i915_vma_instance(B, &ppgtt->vm, NULL); > + if (IS_ERR(vb)) { > + err = PTR_ERR(vb); > + goto out_vm; > + } > + > + err = 0; > + for_each_engine(engine, gt, id) { > + struct i915_gem_ww_ctx ww; > + struct intel_context *ce; > + int bit; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) { > + err = PTR_ERR(ce); > + break; > + } > + > + i915_vm_put(ce->vm); > + ce->vm = i915_vm_get(&ppgtt->vm); > + > + for_i915_gem_ww(&ww, err, true) > + err = intel_context_pin_ww(ce, &ww); > + if (err) > + goto err_put; > + > + for_each_set_bit(bit, > + (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, > + BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { > + unsigned int len; > + > + if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj)) > + continue; > + > + /* sanitycheck the semaphore wake up */ > + err = pte_tlbinv(ce, va, va, > + BIT_ULL(bit), > + NULL, SZ_4K, > + &prng); > + if (err) > + goto err_unpin; > + > + for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { > + err = pte_tlbinv(ce, va, vb, > + BIT_ULL(bit), > + tlbinv, > + BIT_ULL(len), > + &prng); > + if (err) > + goto err_unpin; > + if (len == ppgtt_size) > + break; > + } > + } > +err_unpin: > + intel_context_unpin(ce); > +err_put: > + intel_context_put(ce); > + if (err) > + break; > + } > + > + if (igt_flush_test(gt->i915)) > + err = -EIO; > + > +out_vm: > + i915_vm_put(&ppgtt->vm); > +out_b: > + i915_gem_object_put(B); > +out_a: > + i915_gem_object_put(A); > + return err; > +} > + > +static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) > +{ > + intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); > +} > + > +static int invalidate_full(void *arg) > +{ > + struct intel_gt *gt = arg; > + int err; > + > + if (GRAPHICS_VER(gt->i915) < 8) > + return 0; /* TLB invalidate not implemented */ > + > + err = mem_tlbinv(gt, create_smem, tlbinv_full); > + if (err == 0) > + err = mem_tlbinv(gt, create_lmem, tlbinv_full); > + if (err == -ENODEV || err == -ENXIO) > + err = 0; > + > + return err; > +} > + > +int intel_tlb_live_selftests(struct drm_i915_private *i915) > +{ > + static const struct i915_subtest tests[] = { > + SUBTEST(invalidate_full), > + }; > + struct intel_gt *gt; > + unsigned int i; > + > + for_each_gt(gt, i915, i) { > + int err; > + > + if (intel_gt_is_wedged(gt)) > + continue; > + > + err = intel_gt_live_subtests(tests, gt); > + if (err) > + return err; > + } > + > + return 0; > +} > diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > index aaf8a380e5c789..5aee6c9a8295ce 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > @@ -25,6 +25,7 @@ selftest(gt_lrc, intel_lrc_live_selftests) > selftest(gt_mocs, intel_mocs_live_selftests) > selftest(gt_pm, intel_gt_pm_live_selftests) > selftest(gt_heartbeat, intel_heartbeat_live_selftests) > +selftest(gt_tlb, intel_tlb_live_selftests) > selftest(requests, i915_request_live_selftests) > selftest(migrate, intel_migrate_live_selftests) > selftest(active, i915_active_live_selftests) Okay I can follow it and it looks plausible to me. No obvious errors stick out and if CI says it's solid I'm happy. Have you tried to do a run or wo with TLB invalidation nerfed to see how the detection rate is looking? Regards, Tvrtko
On 30.01.2023 13:35, Tvrtko Ursulin wrote: > > On 30/01/2023 10:14, Andrzej Hajda wrote: >> From: Chris Wilson <chris@chris-wilson.co.uk> >> >> Check that we invalidate the TLB cache, the updated physical addresses >> are immediately visible to the HW, and there is no retention of the old >> physical address for concurrent HW access. >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> [ahajda: adjust to upstream driver, v2+] >> Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> >> --- >> v2: >> - addressed comments (Tvrtko), >> - changed pin/sample address calculation, >> - removed checks for platforms older than 8, >> - use low ints in MI_DO_COMPARE to be more clear, >> - continue test if physical addresses have the same uppper 32 bits, >> - consolidate two calls to pte_tlbinv into one >> v3: >> - skip pages not supported by vm (CI reported EINVAL), >> - fix dw size in MI_CONDITIONAL_BATCH_BUFFER_END for gen8 (CI >> reported EIO), >> - remove aggressive allocation to get different upper halves of >> physical >> address (CI reported OOM). >> v4: >> - align address in MI_CONDITIONAL_BATCH_BUFFER_END to 8b, >> - set QWORD pointed by addr in above cmd, as required by Gen8/VCS. >> v5: >> - set dw size again to 2 (CI reports EIO due to semaphore >> sanitycheck). >> >> Sorry for spamming, but apparently CI is the only way to test Gen8. >> --- >> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + >> drivers/gpu/drm/i915/gt/intel_gt.c | 4 + >> drivers/gpu/drm/i915/gt/selftest_tlb.c | 379 ++++++++++++++++++ >> .../drm/i915/selftests/i915_live_selftests.h | 1 + >> 4 files changed, 385 insertions(+) >> create mode 100644 drivers/gpu/drm/i915/gt/selftest_tlb.c >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> index 2af1ae3831df98..e10507fa71ce63 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> @@ -394,6 +394,7 @@ >> #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) >> #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) >> #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) >> +#define MI_DO_COMPARE REG_BIT(21) >> #define STATE_BASE_ADDRESS \ >> ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c >> b/drivers/gpu/drm/i915/gt/intel_gt.c >> index f0dbfc434e0773..001a7ec5b86182 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c >> @@ -1205,3 +1205,7 @@ void intel_gt_invalidate_tlb(struct intel_gt >> *gt, u32 seqno) >> mutex_unlock(>->tlb.invalidate_lock); >> } >> } >> + >> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) >> +#include "selftest_tlb.c" >> +#endif >> diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c >> b/drivers/gpu/drm/i915/gt/selftest_tlb.c >> new file mode 100644 >> index 00000000000000..166d18a614d51d >> --- /dev/null >> +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c >> @@ -0,0 +1,379 @@ >> +// SPDX-License-Identifier: MIT >> +/* >> + * Copyright © 2022 Intel Corporation >> + */ >> + >> +#include "i915_selftest.h" >> + >> +#include "gem/i915_gem_internal.h" >> +#include "gem/i915_gem_region.h" >> + >> +#include "gen8_engine_cs.h" >> +#include "i915_gem_ww.h" >> +#include "intel_engine_regs.h" >> +#include "intel_gpu_commands.h" >> +#include "intel_context.h" >> +#include "intel_gt.h" >> +#include "intel_ring.h" >> + >> +#include "selftests/igt_flush_test.h" >> +#include "selftests/i915_random.h" >> + >> +static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) >> +{ >> + GEM_BUG_ON(addr < i915_vma_offset(vma)); >> + GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + >> sizeof(val)); >> + memset64(page_mask_bits(vma->obj->mm.mapping) + >> + (addr - i915_vma_offset(vma)), val, 1); >> +} >> + >> +static int >> +pte_tlbinv(struct intel_context *ce, >> + struct i915_vma *va, >> + struct i915_vma *vb, >> + u64 align, >> + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 >> length), >> + u64 length, >> + struct rnd_state *prng) >> +{ >> + struct drm_i915_gem_object *batch; >> + struct i915_request *rq; >> + struct i915_vma *vma; >> + u64 addr; >> + int err; >> + u32 *cs; >> + >> + batch = i915_gem_object_create_internal(ce->vm->i915, 4096); >> + if (IS_ERR(batch)) >> + return PTR_ERR(batch); >> + >> + vma = i915_vma_instance(batch, ce->vm, NULL); >> + if (IS_ERR(vma)) { >> + err = PTR_ERR(vma); >> + goto out; >> + } >> + >> + err = i915_vma_pin(vma, 0, 0, PIN_USER); >> + if (err) >> + goto out; >> + >> + /* Pin va at random but aligned offset after vma */ >> + addr = round_up(vma->node.start + vma->node.size, align); >> + /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ >> + addr = igt_random_offset(prng, addr, min(ce->vm->total, >> BIT_ULL(48)), >> + va->size, align); >> + err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); >> + if (err) { >> + pr_err("Cannot pin at %llx+%llx\n", addr, va->size); >> + goto out; >> + } >> + GEM_BUG_ON(i915_vma_offset(va) != addr); >> + vb->node = va->node; /* overwrites the _same_ PTE */ >> + >> + /* >> + * Now choose random dword at the 1st pinned page. >> + * >> + * SZ_64K pages on dg1 require that the whole PT be marked >> + * containing 64KiB entries. So we make sure that vma >> + * covers the whole PT, despite being randomly aligned to 64KiB >> + * and restrict our sampling to the 2MiB PT within where >> + * we know that we will be using 64KiB pages. >> + */ >> + if (align == SZ_64K) >> + addr = round_up(addr, SZ_2M); >> + addr = igt_random_offset(prng, addr, addr + align, 8, 8); >> + >> + pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE >> size %x (phys %x, sg %x), invalidate:%llx+%llx\n", >> + ce->engine->name, va->obj->mm.region->name ?: "smem", >> + addr, align, va->resource->page_sizes_gtt, va->page_sizes.phys, >> + va->page_sizes.sg, addr & -length, length); > > Worth skipping this log if va == vb? Or logging something different like > "... Sanity checking ...". > >> + >> + cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); >> + *cs++ = MI_NOOP; /* for later termination */ >> + /* >> + * Sample the target to see if we spot an incorrect page. > > s/incorrect page/updated backing store/? Or something like that. > >> + * Gen8 VCS compares immediate value with bitwise-and of two >> + * consecutive DWORDS pointed by addr, other gen/engines compare >> value >> + * with DWORD pointed by addr. Moreover we want to exercise DWORD >> size >> + * invalidations. To fulfill all these requirements below values >> + * has been chosen. > > s/has/have/ > >> + */ >> + *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; >> + *cs++ = 0; /* break if *addr == 0 */ >> + *cs++ = lower_32_bits(addr); >> + *cs++ = upper_32_bits(addr); >> + vma_set_qw(va, addr, -1); >> + vma_set_qw(vb, addr, 0); >> + >> + /* Keep sampling until we get bored */ >> + *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; >> + *cs++ = lower_32_bits(i915_vma_offset(vma)); >> + *cs++ = upper_32_bits(i915_vma_offset(vma)); >> + >> + i915_gem_object_flush_map(batch); >> + >> + rq = i915_request_create(ce); >> + if (IS_ERR(rq)) { >> + err = PTR_ERR(rq); >> + goto out_va; >> + } >> + >> + err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); >> + if (err) { >> + i915_request_add(rq); >> + goto out_va; >> + } >> + >> + i915_request_get(rq); >> + i915_request_add(rq); >> + >> + /* Short sleep to sanitycheck the batch is spinning before we >> begin */ >> + msleep(10); >> + if (va == vb) { >> + if (!i915_request_completed(rq)) { >> + pr_err("Semaphore sanitycheck failed\n"); >> + err = -EIO; >> + } >> + } else if (!i915_request_completed(rq)) { >> + struct i915_vma_resource vb_res = { >> + .bi.pages = vb->obj->mm.pages, >> + .bi.page_sizes = vb->obj->mm.page_sizes, >> + .start = i915_vma_offset(vb), >> + .vma_size = i915_vma_size(vb) >> + }; >> + unsigned int pte_flags = 0; >> + >> + /* Flip the PTE between A and B */ >> + if (i915_gem_object_is_lmem(vb->obj)) >> + pte_flags |= PTE_LM; >> + ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags); >> + >> + /* Flush the PTE update to concurrent HW */ >> + tlbinv(ce->vm, addr & -length, length); >> + >> + if (wait_for(i915_request_completed(rq), HZ / 2)) { >> + pr_err("%s: Request did not complete; the COND_BBE did >> not read the updated PTE\n", >> + ce->engine->name); >> + err = -EINVAL; >> + } >> + } else { >> + pr_err("Spinner ended unexpectedly\n"); >> + err = -EIO; >> + } >> + i915_request_put(rq); >> + >> + cs = page_mask_bits(batch->mm.mapping); >> + *cs = MI_BATCH_BUFFER_END; >> + wmb(); >> + >> +out_va: >> + if (vb != va) >> + memset(&vb->node, 0, sizeof(vb->node)); > > This can maybe be a bit fragile. Store a local copy of vb->node and > restore from that? > >> + i915_vma_unpin(va); >> + if (i915_vma_unbind_unlocked(va)) >> + err = -EIO; >> +out: >> + i915_gem_object_put(batch); >> + return err; >> +} >> + >> +static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) >> +{ >> + /* >> + * Allocation of largest possible page size allows to test all types >> + * of pages. >> + */ >> + return i915_gem_object_create_lmem(gt->i915, SZ_1G, >> I915_BO_ALLOC_CONTIGUOUS); >> +} >> + >> +static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) >> +{ >> + /* >> + * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). >> + * While that does not require the whole 2M block to be contiguous >> + * it is easier to make it so, since we need that for SZ_2M pagees. >> + * Since we randomly offset the start of the vma, we need a 4M >> object >> + * so that there is a 2M range within it is suitable for SZ_64K PTE. >> + */ >> + return i915_gem_object_create_internal(gt->i915, SZ_4M); >> +} >> + >> +static int >> +mem_tlbinv(struct intel_gt *gt, >> + struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), >> + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 >> length)) >> +{ >> + unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; >> + struct intel_engine_cs *engine; >> + struct drm_i915_gem_object *A, *B; >> + struct i915_ppgtt *ppgtt; >> + struct i915_vma *va, *vb; >> + enum intel_engine_id id; >> + I915_RND_STATE(prng); >> + void *vaddr; >> + int err; >> + >> + /* >> + * Check that the TLB invalidate is able to revoke an active >> + * page. We load a page into a spinning COND_BBE loop and then >> + * remap that page to a new physical address. The old address, and >> + * so the loop keeps spinning, is retained in the TLB cache until >> + * we issue an invalidate. >> + */ >> + >> + A = create_fn(gt); >> + if (IS_ERR(A)) >> + return PTR_ERR(A); >> + >> + vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); >> + if (IS_ERR(vaddr)) { >> + err = PTR_ERR(vaddr); >> + goto out_a; >> + } >> + >> + B = create_fn(gt); >> + if (IS_ERR(B)) { >> + err = PTR_ERR(B); >> + goto out_a; >> + } >> + >> + vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); >> + if (IS_ERR(vaddr)) { >> + err = PTR_ERR(vaddr); >> + goto out_b; >> + } >> + >> + GEM_BUG_ON(A->base.size != B->base.size); >> + if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & >> (A->base.size - 1)) >> + pr_warn("Failed to allocate contiguous pages for size %zx\n", >> + A->base.size); >> + >> + ppgtt = i915_ppgtt_create(gt, 0); >> + if (IS_ERR(ppgtt)) { >> + err = PTR_ERR(ppgtt); >> + goto out_b; >> + } >> + >> + va = i915_vma_instance(A, &ppgtt->vm, NULL); >> + if (IS_ERR(va)) { >> + err = PTR_ERR(va); >> + goto out_vm; >> + } >> + >> + vb = i915_vma_instance(B, &ppgtt->vm, NULL); >> + if (IS_ERR(vb)) { >> + err = PTR_ERR(vb); >> + goto out_vm; >> + } >> + >> + err = 0; >> + for_each_engine(engine, gt, id) { >> + struct i915_gem_ww_ctx ww; >> + struct intel_context *ce; >> + int bit; >> + >> + ce = intel_context_create(engine); >> + if (IS_ERR(ce)) { >> + err = PTR_ERR(ce); >> + break; >> + } >> + >> + i915_vm_put(ce->vm); >> + ce->vm = i915_vm_get(&ppgtt->vm); >> + >> + for_i915_gem_ww(&ww, err, true) >> + err = intel_context_pin_ww(ce, &ww); >> + if (err) >> + goto err_put; >> + >> + for_each_set_bit(bit, >> + (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, >> + BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { >> + unsigned int len; >> + >> + if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, >> va->obj)) >> + continue; >> + >> + /* sanitycheck the semaphore wake up */ >> + err = pte_tlbinv(ce, va, va, >> + BIT_ULL(bit), >> + NULL, SZ_4K, >> + &prng); >> + if (err) >> + goto err_unpin; >> + + for (len = 2; len <= ppgtt_size; len = min(2 * len, >> ppgtt_size)) { >> + err = pte_tlbinv(ce, va, vb, >> + BIT_ULL(bit), >> + tlbinv, >> + BIT_ULL(len), >> + &prng); >> + if (err) >> + goto err_unpin; >> + if (len == ppgtt_size) >> + break; >> + } >> + } >> +err_unpin: >> + intel_context_unpin(ce); >> +err_put: >> + intel_context_put(ce); >> + if (err) >> + break; >> + } >> + >> + if (igt_flush_test(gt->i915)) >> + err = -EIO; >> + >> +out_vm: >> + i915_vm_put(&ppgtt->vm); >> +out_b: >> + i915_gem_object_put(B); >> +out_a: >> + i915_gem_object_put(A); >> + return err; >> +} >> + >> +static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 >> length) >> +{ >> + intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); >> +} >> + >> +static int invalidate_full(void *arg) >> +{ >> + struct intel_gt *gt = arg; >> + int err; >> + >> + if (GRAPHICS_VER(gt->i915) < 8) >> + return 0; /* TLB invalidate not implemented */ >> + >> + err = mem_tlbinv(gt, create_smem, tlbinv_full); >> + if (err == 0) >> + err = mem_tlbinv(gt, create_lmem, tlbinv_full); >> + if (err == -ENODEV || err == -ENXIO) >> + err = 0; >> + >> + return err; >> +} >> + >> +int intel_tlb_live_selftests(struct drm_i915_private *i915) >> +{ >> + static const struct i915_subtest tests[] = { >> + SUBTEST(invalidate_full), >> + }; >> + struct intel_gt *gt; >> + unsigned int i; >> + >> + for_each_gt(gt, i915, i) { >> + int err; >> + >> + if (intel_gt_is_wedged(gt)) >> + continue; >> + >> + err = intel_gt_live_subtests(tests, gt); >> + if (err) >> + return err; >> + } >> + >> + return 0; >> +} >> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h >> b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h >> index aaf8a380e5c789..5aee6c9a8295ce 100644 >> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h >> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h >> @@ -25,6 +25,7 @@ selftest(gt_lrc, intel_lrc_live_selftests) >> selftest(gt_mocs, intel_mocs_live_selftests) >> selftest(gt_pm, intel_gt_pm_live_selftests) >> selftest(gt_heartbeat, intel_heartbeat_live_selftests) >> +selftest(gt_tlb, intel_tlb_live_selftests) >> selftest(requests, i915_request_live_selftests) >> selftest(migrate, intel_migrate_live_selftests) >> selftest(active, i915_active_live_selftests) > > Okay I can follow it and it looks plausible to me. No obvious errors > stick out and if CI says it's solid I'm happy. v6 sent with comments addresssed. > > Have you tried to do a run or wo with TLB invalidation nerfed to see how > the detection rate is looking? Yes, nerfing TLB_INV addresses works for every engine. Regards Andrzej > > Regards, > > Tvrtko
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2af1ae3831df98..e10507fa71ce63 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -394,6 +394,7 @@ #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) +#define MI_DO_COMPARE REG_BIT(21) #define STATE_BASE_ADDRESS \ ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f0dbfc434e0773..001a7ec5b86182 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1205,3 +1205,7 @@ void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) mutex_unlock(>->tlb.invalidate_lock); } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_tlb.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c new file mode 100644 index 00000000000000..166d18a614d51d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" + +#include "gen8_engine_cs.h" +#include "i915_gem_ww.h" +#include "intel_engine_regs.h" +#include "intel_gpu_commands.h" +#include "intel_context.h" +#include "intel_gt.h" +#include "intel_ring.h" + +#include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" + +static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) +{ + GEM_BUG_ON(addr < i915_vma_offset(vma)); + GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); + memset64(page_mask_bits(vma->obj->mm.mapping) + + (addr - i915_vma_offset(vma)), val, 1); +} + +static int +pte_tlbinv(struct intel_context *ce, + struct i915_vma *va, + struct i915_vma *vb, + u64 align, + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), + u64 length, + struct rnd_state *prng) +{ + struct drm_i915_gem_object *batch; + struct i915_request *rq; + struct i915_vma *vma; + u64 addr; + int err; + u32 *cs; + + batch = i915_gem_object_create_internal(ce->vm->i915, 4096); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + vma = i915_vma_instance(batch, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out; + + /* Pin va at random but aligned offset after vma */ + addr = round_up(vma->node.start + vma->node.size, align); + /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ + addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)), + va->size, align); + err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); + if (err) { + pr_err("Cannot pin at %llx+%llx\n", addr, va->size); + goto out; + } + GEM_BUG_ON(i915_vma_offset(va) != addr); + vb->node = va->node; /* overwrites the _same_ PTE */ + + /* + * Now choose random dword at the 1st pinned page. + * + * SZ_64K pages on dg1 require that the whole PT be marked + * containing 64KiB entries. So we make sure that vma + * covers the whole PT, despite being randomly aligned to 64KiB + * and restrict our sampling to the 2MiB PT within where + * we know that we will be using 64KiB pages. + */ + if (align == SZ_64K) + addr = round_up(addr, SZ_2M); + addr = igt_random_offset(prng, addr, addr + align, 8, 8); + + pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n", + ce->engine->name, va->obj->mm.region->name ?: "smem", + addr, align, va->resource->page_sizes_gtt, va->page_sizes.phys, + va->page_sizes.sg, addr & -length, length); + + cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); + *cs++ = MI_NOOP; /* for later termination */ + /* + * Sample the target to see if we spot an incorrect page. + * Gen8 VCS compares immediate value with bitwise-and of two + * consecutive DWORDS pointed by addr, other gen/engines compare value + * with DWORD pointed by addr. Moreover we want to exercise DWORD size + * invalidations. To fulfill all these requirements below values + * has been chosen. + */ + *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; + *cs++ = 0; /* break if *addr == 0 */ + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + vma_set_qw(va, addr, -1); + vma_set_qw(vb, addr, 0); + + /* Keep sampling until we get bored */ + *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; + *cs++ = lower_32_bits(i915_vma_offset(vma)); + *cs++ = upper_32_bits(i915_vma_offset(vma)); + + i915_gem_object_flush_map(batch); + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_va; + } + + err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); + if (err) { + i915_request_add(rq); + goto out_va; + } + + i915_request_get(rq); + i915_request_add(rq); + + /* Short sleep to sanitycheck the batch is spinning before we begin */ + msleep(10); + if (va == vb) { + if (!i915_request_completed(rq)) { + pr_err("Semaphore sanitycheck failed\n"); + err = -EIO; + } + } else if (!i915_request_completed(rq)) { + struct i915_vma_resource vb_res = { + .bi.pages = vb->obj->mm.pages, + .bi.page_sizes = vb->obj->mm.page_sizes, + .start = i915_vma_offset(vb), + .vma_size = i915_vma_size(vb) + }; + unsigned int pte_flags = 0; + + /* Flip the PTE between A and B */ + if (i915_gem_object_is_lmem(vb->obj)) + pte_flags |= PTE_LM; + ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags); + + /* Flush the PTE update to concurrent HW */ + tlbinv(ce->vm, addr & -length, length); + + if (wait_for(i915_request_completed(rq), HZ / 2)) { + pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n", + ce->engine->name); + err = -EINVAL; + } + } else { + pr_err("Spinner ended unexpectedly\n"); + err = -EIO; + } + i915_request_put(rq); + + cs = page_mask_bits(batch->mm.mapping); + *cs = MI_BATCH_BUFFER_END; + wmb(); + +out_va: + if (vb != va) + memset(&vb->node, 0, sizeof(vb->node)); + i915_vma_unpin(va); + if (i915_vma_unbind_unlocked(va)) + err = -EIO; +out: + i915_gem_object_put(batch); + return err; +} + +static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) +{ + /* + * Allocation of largest possible page size allows to test all types + * of pages. + */ + return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS); +} + +static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) +{ + /* + * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). + * While that does not require the whole 2M block to be contiguous + * it is easier to make it so, since we need that for SZ_2M pagees. + * Since we randomly offset the start of the vma, we need a 4M object + * so that there is a 2M range within it is suitable for SZ_64K PTE. + */ + return i915_gem_object_create_internal(gt->i915, SZ_4M); +} + +static int +mem_tlbinv(struct intel_gt *gt, + struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), + void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) +{ + unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *A, *B; + struct i915_ppgtt *ppgtt; + struct i915_vma *va, *vb; + enum intel_engine_id id; + I915_RND_STATE(prng); + void *vaddr; + int err; + + /* + * Check that the TLB invalidate is able to revoke an active + * page. We load a page into a spinning COND_BBE loop and then + * remap that page to a new physical address. The old address, and + * so the loop keeps spinning, is retained in the TLB cache until + * we issue an invalidate. + */ + + A = create_fn(gt); + if (IS_ERR(A)) + return PTR_ERR(A); + + vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_a; + } + + B = create_fn(gt); + if (IS_ERR(B)) { + err = PTR_ERR(B); + goto out_a; + } + + vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_b; + } + + GEM_BUG_ON(A->base.size != B->base.size); + if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) + pr_warn("Failed to allocate contiguous pages for size %zx\n", + A->base.size); + + ppgtt = i915_ppgtt_create(gt, 0); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_b; + } + + va = i915_vma_instance(A, &ppgtt->vm, NULL); + if (IS_ERR(va)) { + err = PTR_ERR(va); + goto out_vm; + } + + vb = i915_vma_instance(B, &ppgtt->vm, NULL); + if (IS_ERR(vb)) { + err = PTR_ERR(vb); + goto out_vm; + } + + err = 0; + for_each_engine(engine, gt, id) { + struct i915_gem_ww_ctx ww; + struct intel_context *ce; + int bit; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(&ppgtt->vm); + + for_i915_gem_ww(&ww, err, true) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err_put; + + for_each_set_bit(bit, + (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, + BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { + unsigned int len; + + if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj)) + continue; + + /* sanitycheck the semaphore wake up */ + err = pte_tlbinv(ce, va, va, + BIT_ULL(bit), + NULL, SZ_4K, + &prng); + if (err) + goto err_unpin; + + for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { + err = pte_tlbinv(ce, va, vb, + BIT_ULL(bit), + tlbinv, + BIT_ULL(len), + &prng); + if (err) + goto err_unpin; + if (len == ppgtt_size) + break; + } + } +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + +out_vm: + i915_vm_put(&ppgtt->vm); +out_b: + i915_gem_object_put(B); +out_a: + i915_gem_object_put(A); + return err; +} + +static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) +{ + intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); +} + +static int invalidate_full(void *arg) +{ + struct intel_gt *gt = arg; + int err; + + if (GRAPHICS_VER(gt->i915) < 8) + return 0; /* TLB invalidate not implemented */ + + err = mem_tlbinv(gt, create_smem, tlbinv_full); + if (err == 0) + err = mem_tlbinv(gt, create_lmem, tlbinv_full); + if (err == -ENODEV || err == -ENXIO) + err = 0; + + return err; +} + +int intel_tlb_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(invalidate_full), + }; + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) { + int err; + + if (intel_gt_is_wedged(gt)) + continue; + + err = intel_gt_live_subtests(tests, gt); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index aaf8a380e5c789..5aee6c9a8295ce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -25,6 +25,7 @@ selftest(gt_lrc, intel_lrc_live_selftests) selftest(gt_mocs, intel_mocs_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) selftest(gt_heartbeat, intel_heartbeat_live_selftests) +selftest(gt_tlb, intel_tlb_live_selftests) selftest(requests, i915_request_live_selftests) selftest(migrate, intel_migrate_live_selftests) selftest(active, i915_active_live_selftests)