@@ -255,6 +255,131 @@ static void flink_and_close(void)
close(fd2);
}
+#define PAGE_SIZE 4096
+
+static uint32_t batch_create_size(int fd, uint64_t size)
+{
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ uint32_t handle;
+
+ handle = gem_create(fd, size);
+ gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+ return handle;
+}
+
+#define IGT_USE_ANY 0x1
+#define IGT_USE_PINNED 0x2
+static void upload(int fd, uint32_t handle, uint32_t in_fence, uint32_t ctx_id,
+ unsigned int flags)
+{
+ struct drm_i915_gem_exec_object2 exec[2] = {};
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&exec),
+ .buffer_count = 1,
+ .rsvd1 = ctx_id,
+ };
+
+ if (in_fence) {
+ execbuf.rsvd2 = in_fence;
+ execbuf.flags = I915_EXEC_FENCE_IN;
+ }
+
+ exec[0].handle = handle;
+ exec[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+ if (flags & IGT_USE_PINNED)
+ exec[0].flags |= EXEC_OBJECT_PINNED; /* offset = 0 */
+
+ if (flags & IGT_USE_ANY) {
+ exec[0].flags |= EXEC_OBJECT_PAD_TO_SIZE;
+ exec[0].pad_to_size = gem_aperture_size(fd);
+ }
+
+ gem_execbuf(fd, &execbuf);
+}
+
+static void shrink_vs_evict(unsigned int flags)
+{
+ const unsigned int nproc = sysconf(_SC_NPROCESSORS_ONLN) + 1;
+ const uint64_t timeout_5s = 5LL * NSEC_PER_SEC;
+ int fd = drm_open_driver(DRIVER_INTEL);
+ uint64_t ahnd = get_reloc_ahnd(fd, 0);
+ const intel_ctx_t *ctx_arr[nproc];
+ igt_spin_t *spinner;
+ uint32_t handle1;
+ int i;
+
+ /*
+ * Try to simulate some nasty object lock contention during GTT
+ * eviction. Create a BO and bind across several different VMs. Invoke
+ * the shrinker on that shared BO, followed by triggering GTT eviction
+ * across all VMs. Both require the object lock to make forward
+ * progress when trying to unbind the BO, but the shrinker will be
+ * blocked by the spinner (until killed). Once the spinner is killed
+ * the shrinker should be able to unbind the object and drop the object
+ * lock, and GTT eviction should eventually succeed. At no point should
+ * we see -ENOSPC from the execbuf, even if we can't currently grab the
+ * object lock.
+ */
+
+ igt_require(gem_uses_full_ppgtt(fd));
+
+ igt_drop_caches_set(fd, DROP_ALL);
+
+ handle1 = gem_create(fd, PAGE_SIZE);
+
+ spinner = igt_spin_new(fd,
+ .ahnd = ahnd,
+ .flags = IGT_SPIN_FENCE_OUT);
+ igt_spin_set_timeout(spinner, timeout_5s);
+
+ /*
+ * Create several VMs to ensure we don't block on the same vm lock. The
+ * goal of the test is to ensure that object lock contention doesn't
+ * somehow result in -ENOSPC from execbuf, if we need to trigger GTT
+ * eviction.
+ */
+ for (i = 0; i < nproc; i++) {
+ ctx_arr[i] = intel_ctx_create(fd, NULL);
+
+ upload(fd, handle1, spinner->execbuf.rsvd2 >> 32,
+ ctx_arr[i]->id, flags);
+ }
+
+ igt_fork(child, 1)
+ igt_drop_caches_set(fd, DROP_ALL);
+
+ sleep(2); /* Give the shrinker time to find handle1 */
+
+ igt_fork(child, nproc) {
+ uint32_t handle2;
+
+ /*
+ * One of these forks will be stuck on the vm mutex, since the
+ * shrinker is holding it (along with the object lock) while
+ * trying to unbind the chosen vma, but is blocked by the
+ * spinner. The rest should only block waiting to grab the
+ * object lock for handle1, before then trying to GTT evict it
+ * from their respective vm. In either case the contention of
+ * the vm->mutex or object lock should never result in -ENOSPC
+ * or some other error.
+ */
+ handle2 = batch_create_size(fd, PAGE_SIZE);
+
+ upload(fd, handle2, 0, ctx_arr[child]->id, flags);
+ gem_close(fd, handle2);
+ }
+
+ igt_waitchildren();
+ igt_spin_free(fd, spinner);
+
+ for (i = 0; i < nproc; i++)
+ intel_ctx_destroy(fd, ctx_arr[i]);
+
+ gem_close(fd, handle1);
+}
+
static bool has_contexts(void)
{
bool result;
@@ -331,4 +456,12 @@ igt_main
igt_subtest("flink-and-close-vma-leak")
flink_and_close();
+
+ igt_describe("Regression test to verify GTT eviction can't randomly fail due to object lock contention");
+ igt_subtest_group {
+ igt_subtest("shrink-vs-evict-any")
+ shrink_vs_evict(IGT_USE_ANY);
+ igt_subtest("shrink-vs-evict-pinned")
+ shrink_vs_evict(IGT_USE_PINNED);
+ }
}