@@ -111,9 +111,44 @@ void intel_engine_free_request_pool(struct intel_engine_cs *engine)
if (!engine->request_pool)
return;
+ /*
+ * It's safe to free this right away because we always put a fresh
+ * i915_request in the cache that's never been touched by an RCU
+ * reader.
+ */
kmem_cache_free(global.slab_requests, engine->request_pool);
}
+static void __i915_request_free(struct rcu_head *head)
+{
+ struct i915_request *rq = container_of(head, typeof(*rq), fence.rcu);
+
+ kmem_cache_free(global.slab_requests, rq);
+}
+
+static void i915_request_free_rcu(struct i915_request *rq)
+{
+ /*
+ * Because we're on a slab allocator, memory may be re-used the
+ * moment we free it. There is no kfree_rcu() equivalent for
+ * slabs. Instead, we hand-roll it here with call_rcu(). This
+ * gives us all the perf benefits to slab allocation while ensuring
+ * that we never release a request back to the slab until there are
+ * no more readers.
+ *
+ * We do have to be careful, though, when calling kmem_cache_destroy()
+ * as there may be outstanding free requests. This is solved by
+ * inserting an rcu_barrier() before kmem_cache_destroy(). An RCU
+ * barrier is sufficient and we don't need synchronize_rcu()
+ * because the call_rcu() here will wait on any outstanding RCU
+ * readers and the rcu_barrier() will wait on any outstanding
+ * call_rcu() callbacks. So, if there are any readers who once had
+ * valid references to a request, rcu_barrier() will end up waiting
+ * on them by transitivity.
+ */
+ call_rcu(&rq->fence.rcu, __i915_request_free);
+}
+
static void i915_fence_release(struct dma_fence *fence)
{
struct i915_request *rq = to_request(fence);
@@ -127,8 +162,7 @@ static void i915_fence_release(struct dma_fence *fence)
*/
i915_sw_fence_fini(&rq->submit);
i915_sw_fence_fini(&rq->semaphore);
-
- kmem_cache_free(global.slab_requests, rq);
+ i915_request_free_rcu(rq);
}
const struct dma_fence_ops i915_fence_ops = {
@@ -933,35 +967,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
*/
ensure_cached_request(&ce->engine->request_pool, gfp);
- /*
- * Beware: Dragons be flying overhead.
- *
- * We use RCU to look up requests in flight. The lookups may
- * race with the request being allocated from the slab freelist.
- * That is the request we are writing to here, may be in the process
- * of being read by __i915_active_request_get_rcu(). As such,
- * we have to be very careful when overwriting the contents. During
- * the RCU lookup, we change chase the request->engine pointer,
- * read the request->global_seqno and increment the reference count.
- *
- * The reference count is incremented atomically. If it is zero,
- * the lookup knows the request is unallocated and complete. Otherwise,
- * it is either still in use, or has been reallocated and reset
- * with dma_fence_init(). This increment is safe for release as we
- * check that the request we have a reference to and matches the active
- * request.
- *
- * Before we increment the refcount, we chase the request->engine
- * pointer. We must not call kmem_cache_zalloc() or else we set
- * that pointer to NULL and cause a crash during the lookup. If
- * we see the request is completed (based on the value of the
- * old engine and seqno), the lookup is complete and reports NULL.
- * If we decide the request is not completed (new engine or seqno),
- * then we grab a reference and double check that it is still the
- * active request - which it won't be and restart the lookup.
- *
- * Do not use kmem_cache_zalloc() here!
- */
rq = kmem_cache_alloc(global.slab_requests,
gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!rq)) {
@@ -2116,6 +2121,12 @@ static void i915_global_request_shrink(void)
static void i915_global_request_exit(void)
{
+ /*
+ * We need to rcu_barrier() before destroying slab_requests. See
+ * i915_request_free_rcu() for more details.
+ */
+ rcu_barrier();
+
kmem_cache_destroy(global.slab_execute_cbs);
kmem_cache_destroy(global.slab_requests);
}
@@ -2132,8 +2143,7 @@ int __init i915_global_request_init(void)
sizeof(struct i915_request),
__alignof__(struct i915_request),
SLAB_HWCACHE_ALIGN |
- SLAB_RECLAIM_ACCOUNT |
- SLAB_TYPESAFE_BY_RCU,
+ SLAB_RECLAIM_ACCOUNT,
__i915_request_ctor);
if (!global.slab_requests)
return -ENOMEM;
Ever since 0eafec6d3244 ("drm/i915: Enable lockless lookup of request tracking via RCU"), the i915 driver has used SLAB_TYPESAFE_BY_RCU (it was called SLAB_DESTROY_BY_RCU at the time) in order to allow RCU on i915_request. As nifty as SLAB_TYPESAFE_BY_RCU may be, it comes with some serious disclaimers. In particular, objects can get recycled while RCU readers are still in-flight. This can be ok if everyone who touches these objects knows about the disclaimers and is careful. However, because we've chosen to use SLAB_TYPESAFE_BY_RCU for i915_request and because i915_request contains a dma_fence, we've leaked SLAB_TYPESAFE_BY_RCU and its whole pile of disclaimers to every driver in the kernel which may consume a dma_fence. We've tried to keep it somewhat contained by doing most of the hard work to prevent access of recycled objects via dma_fence_get_rcu_safe(). However, a quick grep of kernel sources says that, of the 30 instances of dma_fence_get_rcu*, only 11 of them use dma_fence_get_rcu_safe(). It's likely there bear traps in DRM and related subsystems just waiting for someone to accidentally step in them. This commit gets stops us using SLAB_TYPESAFE_BY_RCU for i915_request and, instead, does an RCU-safe slab free via rcu_call(). This should let us keep most of the perf benefits of slab allocation while avoiding the bear traps inherent in SLAB_TYPESAFE_BY_RCU. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Christian König <christian.koenig@amd.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> --- drivers/gpu/drm/i915/i915_request.c | 76 ++++++++++++++++------------- 1 file changed, 43 insertions(+), 33 deletions(-)