diff mbox series

[2/2] dma-fence: Refactor signaling for manual invocation

Message ID 20190508120542.28377-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915: Seal races between async GPU cancellation, retirement and signaling | expand

Commit Message

Chris Wilson May 8, 2019, 12:05 p.m. UTC
Move the duplicated code within dma-fence.c into the header for wider
reuse. In the process apply a small micro-optimisation to only prune the
fence->cb_list once rather than use list_del on every entry.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/dma-buf/Makefile                    |  10 +-
 drivers/dma-buf/dma-fence-trace.c           |  28 +++
 drivers/dma-buf/dma-fence.c                 |  32 +--
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |  30 ---
 include/linux/dma-fence-types.h             | 248 +++++++++++++++++++
 include/linux/dma-fence.h                   | 251 +++-----------------
 6 files changed, 321 insertions(+), 278 deletions(-)
 create mode 100644 drivers/dma-buf/dma-fence-trace.c
 create mode 100644 include/linux/dma-fence-types.h

Comments

Chris Wilson May 8, 2019, 12:14 p.m. UTC | #1
Quoting Chris Wilson (2019-05-08 13:05:42)
> Move the duplicated code within dma-fence.c into the header for wider
> reuse. In the process apply a small micro-optimisation to only prune the
> fence->cb_list once rather than use list_del on every entry.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Not yet r-b, just Tvrtko gave a nod of approval that this would be
suitable discussion material.
-Chris
Tvrtko Ursulin May 8, 2019, 12:15 p.m. UTC | #2
On 08/05/2019 13:05, Chris Wilson wrote:
> Move the duplicated code within dma-fence.c into the header for wider
> reuse. In the process apply a small micro-optimisation to only prune the
> fence->cb_list once rather than use list_del on every entry.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I did not give r-b yet, this was probably a misunderstanding.

I said the general idea sounds right to me, but I was unsure whether to 
go the static inline route, or maybe EXPORT_SYMBOL the decomposed helpers.

Regards,

Tvrtko

> ---
>   drivers/dma-buf/Makefile                    |  10 +-
>   drivers/dma-buf/dma-fence-trace.c           |  28 +++
>   drivers/dma-buf/dma-fence.c                 |  32 +--
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |  30 ---
>   include/linux/dma-fence-types.h             | 248 +++++++++++++++++++
>   include/linux/dma-fence.h                   | 251 +++-----------------
>   6 files changed, 321 insertions(+), 278 deletions(-)
>   create mode 100644 drivers/dma-buf/dma-fence-trace.c
>   create mode 100644 include/linux/dma-fence-types.h
> 
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 1f006e083eb9..56e579878f26 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,5 +1,11 @@
> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
> -	 reservation.o seqno-fence.o
> +obj-y := \
> +	dma-buf.o \
> +	dma-fence.o \
> +	dma-fence-array.o \
> +	dma-fence-chain.o \
> +	dma-fence-trace.o \
> +	reservation.o \
> +	seqno-fence.o
>   obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
>   obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
>   obj-$(CONFIG_UDMABUF)		+= udmabuf.o
> diff --git a/drivers/dma-buf/dma-fence-trace.c b/drivers/dma-buf/dma-fence-trace.c
> new file mode 100644
> index 000000000000..eb6f282be4c0
> --- /dev/null
> +++ b/drivers/dma-buf/dma-fence-trace.c
> @@ -0,0 +1,28 @@
> +/*
> + * Fence mechanism for dma-buf and to allow for asynchronous dma access
> + *
> + * Copyright (C) 2012 Canonical Ltd
> + * Copyright (C) 2012 Texas Instruments
> + *
> + * Authors:
> + * Rob Clark <robdclark@gmail.com>
> + * Maarten Lankhorst <maarten.lankhorst@canonical.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include <linux/dma-fence-types.h>
> +
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/dma_fence.h>
> +
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 9bf06042619a..8196a179fdc2 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -24,13 +24,6 @@
>   #include <linux/dma-fence.h>
>   #include <linux/sched/signal.h>
>   
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/dma_fence.h>
> -
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
> -
>   static DEFINE_SPINLOCK(dma_fence_stub_lock);
>   static struct dma_fence dma_fence_stub;
>   
> @@ -136,7 +129,6 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>    */
>   int dma_fence_signal_locked(struct dma_fence *fence)
>   {
> -	struct dma_fence_cb *cur, *tmp;
>   	int ret = 0;
>   
>   	lockdep_assert_held(fence->lock);
> @@ -144,7 +136,7 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>   	if (WARN_ON(!fence))
>   		return -EINVAL;
>   
> -	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
> +	if (!__dma_fence_signal(fence)) {
>   		ret = -EINVAL;
>   
>   		/*
> @@ -152,15 +144,10 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>   		 * still run through all callbacks
>   		 */
>   	} else {
> -		fence->timestamp = ktime_get();
> -		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -		trace_dma_fence_signaled(fence);
> +		__dma_fence_signal__timestamp(fence, ktime_get());
>   	}
>   
> -	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -		list_del_init(&cur->node);
> -		cur->func(fence, cur);
> -	}
> +	__dma_fence_signal__notify(fence);
>   	return ret;
>   }
>   EXPORT_SYMBOL(dma_fence_signal_locked);
> @@ -185,21 +172,14 @@ int dma_fence_signal(struct dma_fence *fence)
>   	if (!fence)
>   		return -EINVAL;
>   
> -	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +	if (!__dma_fence_signal(fence))
>   		return -EINVAL;
>   
> -	fence->timestamp = ktime_get();
> -	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -	trace_dma_fence_signaled(fence);
> +	__dma_fence_signal__timestamp(fence, ktime_get());
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
> -		struct dma_fence_cb *cur, *tmp;
> -
>   		spin_lock_irqsave(fence->lock, flags);
> -		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -			list_del_init(&cur->node);
> -			cur->func(fence, cur);
> -		}
> +		__dma_fence_signal__notify(fence);
>   		spin_unlock_irqrestore(fence->lock, flags);
>   	}
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index c092bdf5f0bf..d1f8572100c3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -23,7 +23,6 @@
>    */
>   
>   #include <linux/kthread.h>
> -#include <trace/events/dma_fence.h>
>   #include <uapi/linux/sched/types.h>
>   
>   #include "i915_drv.h"
> @@ -97,35 +96,6 @@ check_signal_order(struct intel_context *ce, struct i915_request *rq)
>   	return true;
>   }
>   
> -static bool
> -__dma_fence_signal(struct dma_fence *fence)
> -{
> -	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
> -}
> -
> -static void
> -__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
> -{
> -	fence->timestamp = timestamp;
> -	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -	trace_dma_fence_signaled(fence);
> -}
> -
> -static void
> -__dma_fence_signal__notify(struct dma_fence *fence)
> -{
> -	struct dma_fence_cb *cur, *tmp;
> -
> -	lockdep_assert_held(fence->lock);
> -	lockdep_assert_irqs_disabled();
> -
> -	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -		INIT_LIST_HEAD(&cur->node);
> -		cur->func(fence, cur);
> -	}
> -	INIT_LIST_HEAD(&fence->cb_list);
> -}
> -
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> diff --git a/include/linux/dma-fence-types.h b/include/linux/dma-fence-types.h
> new file mode 100644
> index 000000000000..18e7511c0eed
> --- /dev/null
> +++ b/include/linux/dma-fence-types.h
> @@ -0,0 +1,248 @@
> +/*
> + * Fence mechanism for dma-buf to allow for asynchronous dma access
> + *
> + * Copyright (C) 2012 Canonical Ltd
> + * Copyright (C) 2012 Texas Instruments
> + *
> + * Authors:
> + * Rob Clark <robdclark@gmail.com>
> + * Maarten Lankhorst <maarten.lankhorst@canonical.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __LINUX_DMA_FENCE_TYPES_H
> +#define __LINUX_DMA_FENCE_TYPES_H
> +
> +#include <linux/kref.h>
> +#include <linux/ktime.h>
> +
> +struct dma_fence;
> +struct dma_fence_ops;
> +struct dma_fence_cb;
> +
> +/**
> + * struct dma_fence - software synchronization primitive
> + * @refcount: refcount for this fence
> + * @ops: dma_fence_ops associated with this fence
> + * @rcu: used for releasing fence with kfree_rcu
> + * @cb_list: list of all callbacks to call
> + * @lock: spin_lock_irqsave used for locking
> + * @context: execution context this fence belongs to, returned by
> + *           dma_fence_context_alloc()
> + * @seqno: the sequence number of this fence inside the execution context,
> + * can be compared to decide which fence would be signaled later.
> + * @flags: A mask of DMA_FENCE_FLAG_* defined below
> + * @timestamp: Timestamp when the fence was signaled.
> + * @error: Optional, only valid if < 0, must be set before calling
> + * dma_fence_signal, indicates that the fence has completed with an error.
> + *
> + * the flags member must be manipulated and read using the appropriate
> + * atomic ops (bit_*), so taking the spinlock will not be needed most
> + * of the time.
> + *
> + * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
> + * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
> + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
> + * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
> + * implementer of the fence for its own purposes. Can be used in different
> + * ways by different fence implementers, so do not rely on this.
> + *
> + * Since atomic bitops are used, this is not guaranteed to be the case.
> + * Particularly, if the bit was set, but dma_fence_signal was called right
> + * before this bit was set, it would have been able to set the
> + * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
> + * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
> + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
> + * after dma_fence_signal was called, any enable_signaling call will have either
> + * been completed, or never called at all.
> + */
> +struct dma_fence {
> +	struct kref refcount;
> +	const struct dma_fence_ops *ops;
> +	struct rcu_head rcu;
> +	struct list_head cb_list;
> +	spinlock_t *lock;
> +	u64 context;
> +	u64 seqno;
> +	unsigned long flags;
> +	ktime_t timestamp;
> +	int error;
> +};
> +
> +enum dma_fence_flag_bits {
> +	DMA_FENCE_FLAG_SIGNALED_BIT,
> +	DMA_FENCE_FLAG_TIMESTAMP_BIT,
> +	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> +};
> +
> +typedef void (*dma_fence_func_t)(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb);
> +
> +/**
> + * struct dma_fence_cb - callback for dma_fence_add_callback()
> + * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
> + * @func: dma_fence_func_t to call
> + *
> + * This struct will be initialized by dma_fence_add_callback(), additional
> + * data can be passed along by embedding dma_fence_cb in another struct.
> + */
> +struct dma_fence_cb {
> +	struct list_head node;
> +	dma_fence_func_t func;
> +};
> +
> +/**
> + * struct dma_fence_ops - operations implemented for fence
> + *
> + */
> +struct dma_fence_ops {
> +	/**
> +	 * @use_64bit_seqno:
> +	 *
> +	 * True if this dma_fence implementation uses 64bit seqno, false
> +	 * otherwise.
> +	 */
> +	bool use_64bit_seqno;
> +
> +	/**
> +	 * @get_driver_name:
> +	 *
> +	 * Returns the driver name. This is a callback to allow drivers to
> +	 * compute the name at runtime, without having it to store permanently
> +	 * for each fence, or build a cache of some sort.
> +	 *
> +	 * This callback is mandatory.
> +	 */
> +	const char * (*get_driver_name)(struct dma_fence *fence);
> +
> +	/**
> +	 * @get_timeline_name:
> +	 *
> +	 * Return the name of the context this fence belongs to. This is a
> +	 * callback to allow drivers to compute the name at runtime, without
> +	 * having it to store permanently for each fence, or build a cache of
> +	 * some sort.
> +	 *
> +	 * This callback is mandatory.
> +	 */
> +	const char * (*get_timeline_name)(struct dma_fence *fence);
> +
> +	/**
> +	 * @enable_signaling:
> +	 *
> +	 * Enable software signaling of fence.
> +	 *
> +	 * For fence implementations that have the capability for hw->hw
> +	 * signaling, they can implement this op to enable the necessary
> +	 * interrupts, or insert commands into cmdstream, etc, to avoid these
> +	 * costly operations for the common case where only hw->hw
> +	 * synchronization is required.  This is called in the first
> +	 * dma_fence_wait() or dma_fence_add_callback() path to let the fence
> +	 * implementation know that there is another driver waiting on the
> +	 * signal (ie. hw->sw case).
> +	 *
> +	 * This function can be called from atomic context, but not
> +	 * from irq context, so normal spinlocks can be used.
> +	 *
> +	 * A return value of false indicates the fence already passed,
> +	 * or some failure occurred that made it impossible to enable
> +	 * signaling. True indicates successful enabling.
> +	 *
> +	 * &dma_fence.error may be set in enable_signaling, but only when false
> +	 * is returned.
> +	 *
> +	 * Since many implementations can call dma_fence_signal() even when before
> +	 * @enable_signaling has been called there's a race window, where the
> +	 * dma_fence_signal() might result in the final fence reference being
> +	 * released and its memory freed. To avoid this, implementations of this
> +	 * callback should grab their own reference using dma_fence_get(), to be
> +	 * released when the fence is signalled (through e.g. the interrupt
> +	 * handler).
> +	 *
> +	 * This callback is optional. If this callback is not present, then the
> +	 * driver must always have signaling enabled.
> +	 */
> +	bool (*enable_signaling)(struct dma_fence *fence);
> +
> +	/**
> +	 * @signaled:
> +	 *
> +	 * Peek whether the fence is signaled, as a fastpath optimization for
> +	 * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
> +	 * callback does not need to make any guarantees beyond that a fence
> +	 * once indicates as signalled must always return true from this
> +	 * callback. This callback may return false even if the fence has
> +	 * completed already, in this case information hasn't propogated throug
> +	 * the system yet. See also dma_fence_is_signaled().
> +	 *
> +	 * May set &dma_fence.error if returning true.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	bool (*signaled)(struct dma_fence *fence);
> +
> +	/**
> +	 * @wait:
> +	 *
> +	 * Custom wait implementation, defaults to dma_fence_default_wait() if
> +	 * not set.
> +	 *
> +	 * The dma_fence_default_wait implementation should work for any fence, as long
> +	 * as @enable_signaling works correctly. This hook allows drivers to
> +	 * have an optimized version for the case where a process context is
> +	 * already available, e.g. if @enable_signaling for the general case
> +	 * needs to set up a worker thread.
> +	 *
> +	 * Must return -ERESTARTSYS if the wait is intr = true and the wait was
> +	 * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
> +	 * timed out. Can also return other error values on custom implementations,
> +	 * which should be treated as if the fence is signaled. For example a hardware
> +	 * lockup could be reported like that.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	signed long (*wait)(struct dma_fence *fence,
> +			    bool intr, signed long timeout);
> +
> +	/**
> +	 * @release:
> +	 *
> +	 * Called on destruction of fence to release additional resources.
> +	 * Can be called from irq context.  This callback is optional. If it is
> +	 * NULL, then dma_fence_free() is instead called as the default
> +	 * implementation.
> +	 */
> +	void (*release)(struct dma_fence *fence);
> +
> +	/**
> +	 * @fence_value_str:
> +	 *
> +	 * Callback to fill in free-form debug info specific to this fence, like
> +	 * the sequence number.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
> +
> +	/**
> +	 * @timeline_value_str:
> +	 *
> +	 * Fills in the current value of the timeline as a string, like the
> +	 * sequence number. Note that the specific fence passed to this function
> +	 * should not matter, drivers should only use it to look up the
> +	 * corresponding timeline structures.
> +	 */
> +	void (*timeline_value_str)(struct dma_fence *fence,
> +				   char *str, int size);
> +};
> +
> +#endif /* __LINUX_DMA_FENCE_TYPES_H */
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 974717d6ac0c..142eb67e695f 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -21,6 +21,7 @@
>   #ifndef __LINUX_DMA_FENCE_H
>   #define __LINUX_DMA_FENCE_H
>   
> +#include <linux/dma-fence-types.h>
>   #include <linux/err.h>
>   #include <linux/wait.h>
>   #include <linux/list.h>
> @@ -30,226 +31,7 @@
>   #include <linux/printk.h>
>   #include <linux/rcupdate.h>
>   
> -struct dma_fence;
> -struct dma_fence_ops;
> -struct dma_fence_cb;
> -
> -/**
> - * struct dma_fence - software synchronization primitive
> - * @refcount: refcount for this fence
> - * @ops: dma_fence_ops associated with this fence
> - * @rcu: used for releasing fence with kfree_rcu
> - * @cb_list: list of all callbacks to call
> - * @lock: spin_lock_irqsave used for locking
> - * @context: execution context this fence belongs to, returned by
> - *           dma_fence_context_alloc()
> - * @seqno: the sequence number of this fence inside the execution context,
> - * can be compared to decide which fence would be signaled later.
> - * @flags: A mask of DMA_FENCE_FLAG_* defined below
> - * @timestamp: Timestamp when the fence was signaled.
> - * @error: Optional, only valid if < 0, must be set before calling
> - * dma_fence_signal, indicates that the fence has completed with an error.
> - *
> - * the flags member must be manipulated and read using the appropriate
> - * atomic ops (bit_*), so taking the spinlock will not be needed most
> - * of the time.
> - *
> - * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
> - * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
> - * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
> - * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
> - * implementer of the fence for its own purposes. Can be used in different
> - * ways by different fence implementers, so do not rely on this.
> - *
> - * Since atomic bitops are used, this is not guaranteed to be the case.
> - * Particularly, if the bit was set, but dma_fence_signal was called right
> - * before this bit was set, it would have been able to set the
> - * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
> - * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
> - * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
> - * after dma_fence_signal was called, any enable_signaling call will have either
> - * been completed, or never called at all.
> - */
> -struct dma_fence {
> -	struct kref refcount;
> -	const struct dma_fence_ops *ops;
> -	struct rcu_head rcu;
> -	struct list_head cb_list;
> -	spinlock_t *lock;
> -	u64 context;
> -	u64 seqno;
> -	unsigned long flags;
> -	ktime_t timestamp;
> -	int error;
> -};
> -
> -enum dma_fence_flag_bits {
> -	DMA_FENCE_FLAG_SIGNALED_BIT,
> -	DMA_FENCE_FLAG_TIMESTAMP_BIT,
> -	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> -	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> -};
> -
> -typedef void (*dma_fence_func_t)(struct dma_fence *fence,
> -				 struct dma_fence_cb *cb);
> -
> -/**
> - * struct dma_fence_cb - callback for dma_fence_add_callback()
> - * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
> - * @func: dma_fence_func_t to call
> - *
> - * This struct will be initialized by dma_fence_add_callback(), additional
> - * data can be passed along by embedding dma_fence_cb in another struct.
> - */
> -struct dma_fence_cb {
> -	struct list_head node;
> -	dma_fence_func_t func;
> -};
> -
> -/**
> - * struct dma_fence_ops - operations implemented for fence
> - *
> - */
> -struct dma_fence_ops {
> -	/**
> -	 * @use_64bit_seqno:
> -	 *
> -	 * True if this dma_fence implementation uses 64bit seqno, false
> -	 * otherwise.
> -	 */
> -	bool use_64bit_seqno;
> -
> -	/**
> -	 * @get_driver_name:
> -	 *
> -	 * Returns the driver name. This is a callback to allow drivers to
> -	 * compute the name at runtime, without having it to store permanently
> -	 * for each fence, or build a cache of some sort.
> -	 *
> -	 * This callback is mandatory.
> -	 */
> -	const char * (*get_driver_name)(struct dma_fence *fence);
> -
> -	/**
> -	 * @get_timeline_name:
> -	 *
> -	 * Return the name of the context this fence belongs to. This is a
> -	 * callback to allow drivers to compute the name at runtime, without
> -	 * having it to store permanently for each fence, or build a cache of
> -	 * some sort.
> -	 *
> -	 * This callback is mandatory.
> -	 */
> -	const char * (*get_timeline_name)(struct dma_fence *fence);
> -
> -	/**
> -	 * @enable_signaling:
> -	 *
> -	 * Enable software signaling of fence.
> -	 *
> -	 * For fence implementations that have the capability for hw->hw
> -	 * signaling, they can implement this op to enable the necessary
> -	 * interrupts, or insert commands into cmdstream, etc, to avoid these
> -	 * costly operations for the common case where only hw->hw
> -	 * synchronization is required.  This is called in the first
> -	 * dma_fence_wait() or dma_fence_add_callback() path to let the fence
> -	 * implementation know that there is another driver waiting on the
> -	 * signal (ie. hw->sw case).
> -	 *
> -	 * This function can be called from atomic context, but not
> -	 * from irq context, so normal spinlocks can be used.
> -	 *
> -	 * A return value of false indicates the fence already passed,
> -	 * or some failure occurred that made it impossible to enable
> -	 * signaling. True indicates successful enabling.
> -	 *
> -	 * &dma_fence.error may be set in enable_signaling, but only when false
> -	 * is returned.
> -	 *
> -	 * Since many implementations can call dma_fence_signal() even when before
> -	 * @enable_signaling has been called there's a race window, where the
> -	 * dma_fence_signal() might result in the final fence reference being
> -	 * released and its memory freed. To avoid this, implementations of this
> -	 * callback should grab their own reference using dma_fence_get(), to be
> -	 * released when the fence is signalled (through e.g. the interrupt
> -	 * handler).
> -	 *
> -	 * This callback is optional. If this callback is not present, then the
> -	 * driver must always have signaling enabled.
> -	 */
> -	bool (*enable_signaling)(struct dma_fence *fence);
> -
> -	/**
> -	 * @signaled:
> -	 *
> -	 * Peek whether the fence is signaled, as a fastpath optimization for
> -	 * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
> -	 * callback does not need to make any guarantees beyond that a fence
> -	 * once indicates as signalled must always return true from this
> -	 * callback. This callback may return false even if the fence has
> -	 * completed already, in this case information hasn't propogated throug
> -	 * the system yet. See also dma_fence_is_signaled().
> -	 *
> -	 * May set &dma_fence.error if returning true.
> -	 *
> -	 * This callback is optional.
> -	 */
> -	bool (*signaled)(struct dma_fence *fence);
> -
> -	/**
> -	 * @wait:
> -	 *
> -	 * Custom wait implementation, defaults to dma_fence_default_wait() if
> -	 * not set.
> -	 *
> -	 * The dma_fence_default_wait implementation should work for any fence, as long
> -	 * as @enable_signaling works correctly. This hook allows drivers to
> -	 * have an optimized version for the case where a process context is
> -	 * already available, e.g. if @enable_signaling for the general case
> -	 * needs to set up a worker thread.
> -	 *
> -	 * Must return -ERESTARTSYS if the wait is intr = true and the wait was
> -	 * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
> -	 * timed out. Can also return other error values on custom implementations,
> -	 * which should be treated as if the fence is signaled. For example a hardware
> -	 * lockup could be reported like that.
> -	 *
> -	 * This callback is optional.
> -	 */
> -	signed long (*wait)(struct dma_fence *fence,
> -			    bool intr, signed long timeout);
> -
> -	/**
> -	 * @release:
> -	 *
> -	 * Called on destruction of fence to release additional resources.
> -	 * Can be called from irq context.  This callback is optional. If it is
> -	 * NULL, then dma_fence_free() is instead called as the default
> -	 * implementation.
> -	 */
> -	void (*release)(struct dma_fence *fence);
> -
> -	/**
> -	 * @fence_value_str:
> -	 *
> -	 * Callback to fill in free-form debug info specific to this fence, like
> -	 * the sequence number.
> -	 *
> -	 * This callback is optional.
> -	 */
> -	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
> -
> -	/**
> -	 * @timeline_value_str:
> -	 *
> -	 * Fills in the current value of the timeline as a string, like the
> -	 * sequence number. Note that the specific fence passed to this function
> -	 * should not matter, drivers should only use it to look up the
> -	 * corresponding timeline structures.
> -	 */
> -	void (*timeline_value_str)(struct dma_fence *fence,
> -				   char *str, int size);
> -};
> +#include <trace/events/dma_fence.h>
>   
>   void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
>   		    spinlock_t *lock, u64 context, u64 seqno);
> @@ -561,6 +343,35 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
>   struct dma_fence *dma_fence_get_stub(void);
>   u64 dma_fence_context_alloc(unsigned num);
>   
> +static inline bool
> +__dma_fence_signal(struct dma_fence *fence)
> +{
> +	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
> +}
> +
> +static inline void
> +__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
> +{
> +	fence->timestamp = timestamp;
> +	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> +	trace_dma_fence_signaled(fence);
> +}
> +
> +static inline void
> +__dma_fence_signal__notify(struct dma_fence *fence)
> +{
> +	struct dma_fence_cb *cur, *tmp;
> +
> +	lockdep_assert_held(fence->lock);
> +	lockdep_assert_irqs_disabled();
> +
> +	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> +		INIT_LIST_HEAD(&cur->node);
> +		cur->func(fence, cur);
> +	}
> +	INIT_LIST_HEAD(&fence->cb_list);
> +}
> +
>   #define DMA_FENCE_TRACE(f, fmt, args...) \
>   	do {								\
>   		struct dma_fence *__ff = (f);				\
>
Daniel Vetter May 8, 2019, 12:40 p.m. UTC | #3
On Wed, May 8, 2019 at 2:05 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Move the duplicated code within dma-fence.c into the header for wider
> reuse. In the process apply a small micro-optimisation to only prune the
> fence->cb_list once rather than use list_del on every entry.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I have no opinion on the change itself, but spotted two things while
trying to understand what's going on:

- Please update Documentation/driver-api/dma-buf.rst to keep the
kerneldoc in the newly extracted file included.

- The DMA_FENCE_FLAG_TIMESTAMP_BIT trickery added in 76250f2b743b7
seems to have lost the memory barriers in the process. I think we need
to re-add them. Altough looking at the old code we lacked them on the
reader side since forever :-/

Cheers, Daniel

> ---
>  drivers/dma-buf/Makefile                    |  10 +-
>  drivers/dma-buf/dma-fence-trace.c           |  28 +++
>  drivers/dma-buf/dma-fence.c                 |  32 +--
>  drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |  30 ---
>  include/linux/dma-fence-types.h             | 248 +++++++++++++++++++
>  include/linux/dma-fence.h                   | 251 +++-----------------
>  6 files changed, 321 insertions(+), 278 deletions(-)
>  create mode 100644 drivers/dma-buf/dma-fence-trace.c
>  create mode 100644 include/linux/dma-fence-types.h
>
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 1f006e083eb9..56e579878f26 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,5 +1,11 @@
> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
> -        reservation.o seqno-fence.o
> +obj-y := \
> +       dma-buf.o \
> +       dma-fence.o \
> +       dma-fence-array.o \
> +       dma-fence-chain.o \
> +       dma-fence-trace.o \
> +       reservation.o \
> +       seqno-fence.o
>  obj-$(CONFIG_SYNC_FILE)                += sync_file.o
>  obj-$(CONFIG_SW_SYNC)          += sw_sync.o sync_debug.o
>  obj-$(CONFIG_UDMABUF)          += udmabuf.o
> diff --git a/drivers/dma-buf/dma-fence-trace.c b/drivers/dma-buf/dma-fence-trace.c
> new file mode 100644
> index 000000000000..eb6f282be4c0
> --- /dev/null
> +++ b/drivers/dma-buf/dma-fence-trace.c
> @@ -0,0 +1,28 @@
> +/*
> + * Fence mechanism for dma-buf and to allow for asynchronous dma access
> + *
> + * Copyright (C) 2012 Canonical Ltd
> + * Copyright (C) 2012 Texas Instruments
> + *
> + * Authors:
> + * Rob Clark <robdclark@gmail.com>
> + * Maarten Lankhorst <maarten.lankhorst@canonical.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include <linux/dma-fence-types.h>
> +
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/dma_fence.h>
> +
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 9bf06042619a..8196a179fdc2 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -24,13 +24,6 @@
>  #include <linux/dma-fence.h>
>  #include <linux/sched/signal.h>
>
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/dma_fence.h>
> -
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> -EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
> -
>  static DEFINE_SPINLOCK(dma_fence_stub_lock);
>  static struct dma_fence dma_fence_stub;
>
> @@ -136,7 +129,6 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>   */
>  int dma_fence_signal_locked(struct dma_fence *fence)
>  {
> -       struct dma_fence_cb *cur, *tmp;
>         int ret = 0;
>
>         lockdep_assert_held(fence->lock);
> @@ -144,7 +136,7 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>         if (WARN_ON(!fence))
>                 return -EINVAL;
>
> -       if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
> +       if (!__dma_fence_signal(fence)) {
>                 ret = -EINVAL;
>
>                 /*
> @@ -152,15 +144,10 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>                  * still run through all callbacks
>                  */
>         } else {
> -               fence->timestamp = ktime_get();
> -               set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -               trace_dma_fence_signaled(fence);
> +               __dma_fence_signal__timestamp(fence, ktime_get());
>         }
>
> -       list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -               list_del_init(&cur->node);
> -               cur->func(fence, cur);
> -       }
> +       __dma_fence_signal__notify(fence);
>         return ret;
>  }
>  EXPORT_SYMBOL(dma_fence_signal_locked);
> @@ -185,21 +172,14 @@ int dma_fence_signal(struct dma_fence *fence)
>         if (!fence)
>                 return -EINVAL;
>
> -       if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +       if (!__dma_fence_signal(fence))
>                 return -EINVAL;
>
> -       fence->timestamp = ktime_get();
> -       set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -       trace_dma_fence_signaled(fence);
> +       __dma_fence_signal__timestamp(fence, ktime_get());
>
>         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
> -               struct dma_fence_cb *cur, *tmp;
> -
>                 spin_lock_irqsave(fence->lock, flags);
> -               list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -                       list_del_init(&cur->node);
> -                       cur->func(fence, cur);
> -               }
> +               __dma_fence_signal__notify(fence);
>                 spin_unlock_irqrestore(fence->lock, flags);
>         }
>         return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index c092bdf5f0bf..d1f8572100c3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -23,7 +23,6 @@
>   */
>
>  #include <linux/kthread.h>
> -#include <trace/events/dma_fence.h>
>  #include <uapi/linux/sched/types.h>
>
>  #include "i915_drv.h"
> @@ -97,35 +96,6 @@ check_signal_order(struct intel_context *ce, struct i915_request *rq)
>         return true;
>  }
>
> -static bool
> -__dma_fence_signal(struct dma_fence *fence)
> -{
> -       return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
> -}
> -
> -static void
> -__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
> -{
> -       fence->timestamp = timestamp;
> -       set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> -       trace_dma_fence_signaled(fence);
> -}
> -
> -static void
> -__dma_fence_signal__notify(struct dma_fence *fence)
> -{
> -       struct dma_fence_cb *cur, *tmp;
> -
> -       lockdep_assert_held(fence->lock);
> -       lockdep_assert_irqs_disabled();
> -
> -       list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -               INIT_LIST_HEAD(&cur->node);
> -               cur->func(fence, cur);
> -       }
> -       INIT_LIST_HEAD(&fence->cb_list);
> -}
> -
>  void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>  {
>         struct intel_breadcrumbs *b = &engine->breadcrumbs;
> diff --git a/include/linux/dma-fence-types.h b/include/linux/dma-fence-types.h
> new file mode 100644
> index 000000000000..18e7511c0eed
> --- /dev/null
> +++ b/include/linux/dma-fence-types.h
> @@ -0,0 +1,248 @@
> +/*
> + * Fence mechanism for dma-buf to allow for asynchronous dma access
> + *
> + * Copyright (C) 2012 Canonical Ltd
> + * Copyright (C) 2012 Texas Instruments
> + *
> + * Authors:
> + * Rob Clark <robdclark@gmail.com>
> + * Maarten Lankhorst <maarten.lankhorst@canonical.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __LINUX_DMA_FENCE_TYPES_H
> +#define __LINUX_DMA_FENCE_TYPES_H
> +
> +#include <linux/kref.h>
> +#include <linux/ktime.h>
> +
> +struct dma_fence;
> +struct dma_fence_ops;
> +struct dma_fence_cb;
> +
> +/**
> + * struct dma_fence - software synchronization primitive
> + * @refcount: refcount for this fence
> + * @ops: dma_fence_ops associated with this fence
> + * @rcu: used for releasing fence with kfree_rcu
> + * @cb_list: list of all callbacks to call
> + * @lock: spin_lock_irqsave used for locking
> + * @context: execution context this fence belongs to, returned by
> + *           dma_fence_context_alloc()
> + * @seqno: the sequence number of this fence inside the execution context,
> + * can be compared to decide which fence would be signaled later.
> + * @flags: A mask of DMA_FENCE_FLAG_* defined below
> + * @timestamp: Timestamp when the fence was signaled.
> + * @error: Optional, only valid if < 0, must be set before calling
> + * dma_fence_signal, indicates that the fence has completed with an error.
> + *
> + * the flags member must be manipulated and read using the appropriate
> + * atomic ops (bit_*), so taking the spinlock will not be needed most
> + * of the time.
> + *
> + * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
> + * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
> + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
> + * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
> + * implementer of the fence for its own purposes. Can be used in different
> + * ways by different fence implementers, so do not rely on this.
> + *
> + * Since atomic bitops are used, this is not guaranteed to be the case.
> + * Particularly, if the bit was set, but dma_fence_signal was called right
> + * before this bit was set, it would have been able to set the
> + * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
> + * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
> + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
> + * after dma_fence_signal was called, any enable_signaling call will have either
> + * been completed, or never called at all.
> + */
> +struct dma_fence {
> +       struct kref refcount;
> +       const struct dma_fence_ops *ops;
> +       struct rcu_head rcu;
> +       struct list_head cb_list;
> +       spinlock_t *lock;
> +       u64 context;
> +       u64 seqno;
> +       unsigned long flags;
> +       ktime_t timestamp;
> +       int error;
> +};
> +
> +enum dma_fence_flag_bits {
> +       DMA_FENCE_FLAG_SIGNALED_BIT,
> +       DMA_FENCE_FLAG_TIMESTAMP_BIT,
> +       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +       DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> +};
> +
> +typedef void (*dma_fence_func_t)(struct dma_fence *fence,
> +                                struct dma_fence_cb *cb);
> +
> +/**
> + * struct dma_fence_cb - callback for dma_fence_add_callback()
> + * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
> + * @func: dma_fence_func_t to call
> + *
> + * This struct will be initialized by dma_fence_add_callback(), additional
> + * data can be passed along by embedding dma_fence_cb in another struct.
> + */
> +struct dma_fence_cb {
> +       struct list_head node;
> +       dma_fence_func_t func;
> +};
> +
> +/**
> + * struct dma_fence_ops - operations implemented for fence
> + *
> + */
> +struct dma_fence_ops {
> +       /**
> +        * @use_64bit_seqno:
> +        *
> +        * True if this dma_fence implementation uses 64bit seqno, false
> +        * otherwise.
> +        */
> +       bool use_64bit_seqno;
> +
> +       /**
> +        * @get_driver_name:
> +        *
> +        * Returns the driver name. This is a callback to allow drivers to
> +        * compute the name at runtime, without having it to store permanently
> +        * for each fence, or build a cache of some sort.
> +        *
> +        * This callback is mandatory.
> +        */
> +       const char * (*get_driver_name)(struct dma_fence *fence);
> +
> +       /**
> +        * @get_timeline_name:
> +        *
> +        * Return the name of the context this fence belongs to. This is a
> +        * callback to allow drivers to compute the name at runtime, without
> +        * having it to store permanently for each fence, or build a cache of
> +        * some sort.
> +        *
> +        * This callback is mandatory.
> +        */
> +       const char * (*get_timeline_name)(struct dma_fence *fence);
> +
> +       /**
> +        * @enable_signaling:
> +        *
> +        * Enable software signaling of fence.
> +        *
> +        * For fence implementations that have the capability for hw->hw
> +        * signaling, they can implement this op to enable the necessary
> +        * interrupts, or insert commands into cmdstream, etc, to avoid these
> +        * costly operations for the common case where only hw->hw
> +        * synchronization is required.  This is called in the first
> +        * dma_fence_wait() or dma_fence_add_callback() path to let the fence
> +        * implementation know that there is another driver waiting on the
> +        * signal (ie. hw->sw case).
> +        *
> +        * This function can be called from atomic context, but not
> +        * from irq context, so normal spinlocks can be used.
> +        *
> +        * A return value of false indicates the fence already passed,
> +        * or some failure occurred that made it impossible to enable
> +        * signaling. True indicates successful enabling.
> +        *
> +        * &dma_fence.error may be set in enable_signaling, but only when false
> +        * is returned.
> +        *
> +        * Since many implementations can call dma_fence_signal() even when before
> +        * @enable_signaling has been called there's a race window, where the
> +        * dma_fence_signal() might result in the final fence reference being
> +        * released and its memory freed. To avoid this, implementations of this
> +        * callback should grab their own reference using dma_fence_get(), to be
> +        * released when the fence is signalled (through e.g. the interrupt
> +        * handler).
> +        *
> +        * This callback is optional. If this callback is not present, then the
> +        * driver must always have signaling enabled.
> +        */
> +       bool (*enable_signaling)(struct dma_fence *fence);
> +
> +       /**
> +        * @signaled:
> +        *
> +        * Peek whether the fence is signaled, as a fastpath optimization for
> +        * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
> +        * callback does not need to make any guarantees beyond that a fence
> +        * once indicates as signalled must always return true from this
> +        * callback. This callback may return false even if the fence has
> +        * completed already, in this case information hasn't propogated throug
> +        * the system yet. See also dma_fence_is_signaled().
> +        *
> +        * May set &dma_fence.error if returning true.
> +        *
> +        * This callback is optional.
> +        */
> +       bool (*signaled)(struct dma_fence *fence);
> +
> +       /**
> +        * @wait:
> +        *
> +        * Custom wait implementation, defaults to dma_fence_default_wait() if
> +        * not set.
> +        *
> +        * The dma_fence_default_wait implementation should work for any fence, as long
> +        * as @enable_signaling works correctly. This hook allows drivers to
> +        * have an optimized version for the case where a process context is
> +        * already available, e.g. if @enable_signaling for the general case
> +        * needs to set up a worker thread.
> +        *
> +        * Must return -ERESTARTSYS if the wait is intr = true and the wait was
> +        * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
> +        * timed out. Can also return other error values on custom implementations,
> +        * which should be treated as if the fence is signaled. For example a hardware
> +        * lockup could be reported like that.
> +        *
> +        * This callback is optional.
> +        */
> +       signed long (*wait)(struct dma_fence *fence,
> +                           bool intr, signed long timeout);
> +
> +       /**
> +        * @release:
> +        *
> +        * Called on destruction of fence to release additional resources.
> +        * Can be called from irq context.  This callback is optional. If it is
> +        * NULL, then dma_fence_free() is instead called as the default
> +        * implementation.
> +        */
> +       void (*release)(struct dma_fence *fence);
> +
> +       /**
> +        * @fence_value_str:
> +        *
> +        * Callback to fill in free-form debug info specific to this fence, like
> +        * the sequence number.
> +        *
> +        * This callback is optional.
> +        */
> +       void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
> +
> +       /**
> +        * @timeline_value_str:
> +        *
> +        * Fills in the current value of the timeline as a string, like the
> +        * sequence number. Note that the specific fence passed to this function
> +        * should not matter, drivers should only use it to look up the
> +        * corresponding timeline structures.
> +        */
> +       void (*timeline_value_str)(struct dma_fence *fence,
> +                                  char *str, int size);
> +};
> +
> +#endif /* __LINUX_DMA_FENCE_TYPES_H */
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 974717d6ac0c..142eb67e695f 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -21,6 +21,7 @@
>  #ifndef __LINUX_DMA_FENCE_H
>  #define __LINUX_DMA_FENCE_H
>
> +#include <linux/dma-fence-types.h>
>  #include <linux/err.h>
>  #include <linux/wait.h>
>  #include <linux/list.h>
> @@ -30,226 +31,7 @@
>  #include <linux/printk.h>
>  #include <linux/rcupdate.h>
>
> -struct dma_fence;
> -struct dma_fence_ops;
> -struct dma_fence_cb;
> -
> -/**
> - * struct dma_fence - software synchronization primitive
> - * @refcount: refcount for this fence
> - * @ops: dma_fence_ops associated with this fence
> - * @rcu: used for releasing fence with kfree_rcu
> - * @cb_list: list of all callbacks to call
> - * @lock: spin_lock_irqsave used for locking
> - * @context: execution context this fence belongs to, returned by
> - *           dma_fence_context_alloc()
> - * @seqno: the sequence number of this fence inside the execution context,
> - * can be compared to decide which fence would be signaled later.
> - * @flags: A mask of DMA_FENCE_FLAG_* defined below
> - * @timestamp: Timestamp when the fence was signaled.
> - * @error: Optional, only valid if < 0, must be set before calling
> - * dma_fence_signal, indicates that the fence has completed with an error.
> - *
> - * the flags member must be manipulated and read using the appropriate
> - * atomic ops (bit_*), so taking the spinlock will not be needed most
> - * of the time.
> - *
> - * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
> - * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
> - * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
> - * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
> - * implementer of the fence for its own purposes. Can be used in different
> - * ways by different fence implementers, so do not rely on this.
> - *
> - * Since atomic bitops are used, this is not guaranteed to be the case.
> - * Particularly, if the bit was set, but dma_fence_signal was called right
> - * before this bit was set, it would have been able to set the
> - * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
> - * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
> - * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
> - * after dma_fence_signal was called, any enable_signaling call will have either
> - * been completed, or never called at all.
> - */
> -struct dma_fence {
> -       struct kref refcount;
> -       const struct dma_fence_ops *ops;
> -       struct rcu_head rcu;
> -       struct list_head cb_list;
> -       spinlock_t *lock;
> -       u64 context;
> -       u64 seqno;
> -       unsigned long flags;
> -       ktime_t timestamp;
> -       int error;
> -};
> -
> -enum dma_fence_flag_bits {
> -       DMA_FENCE_FLAG_SIGNALED_BIT,
> -       DMA_FENCE_FLAG_TIMESTAMP_BIT,
> -       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> -       DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> -};
> -
> -typedef void (*dma_fence_func_t)(struct dma_fence *fence,
> -                                struct dma_fence_cb *cb);
> -
> -/**
> - * struct dma_fence_cb - callback for dma_fence_add_callback()
> - * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
> - * @func: dma_fence_func_t to call
> - *
> - * This struct will be initialized by dma_fence_add_callback(), additional
> - * data can be passed along by embedding dma_fence_cb in another struct.
> - */
> -struct dma_fence_cb {
> -       struct list_head node;
> -       dma_fence_func_t func;
> -};
> -
> -/**
> - * struct dma_fence_ops - operations implemented for fence
> - *
> - */
> -struct dma_fence_ops {
> -       /**
> -        * @use_64bit_seqno:
> -        *
> -        * True if this dma_fence implementation uses 64bit seqno, false
> -        * otherwise.
> -        */
> -       bool use_64bit_seqno;
> -
> -       /**
> -        * @get_driver_name:
> -        *
> -        * Returns the driver name. This is a callback to allow drivers to
> -        * compute the name at runtime, without having it to store permanently
> -        * for each fence, or build a cache of some sort.
> -        *
> -        * This callback is mandatory.
> -        */
> -       const char * (*get_driver_name)(struct dma_fence *fence);
> -
> -       /**
> -        * @get_timeline_name:
> -        *
> -        * Return the name of the context this fence belongs to. This is a
> -        * callback to allow drivers to compute the name at runtime, without
> -        * having it to store permanently for each fence, or build a cache of
> -        * some sort.
> -        *
> -        * This callback is mandatory.
> -        */
> -       const char * (*get_timeline_name)(struct dma_fence *fence);
> -
> -       /**
> -        * @enable_signaling:
> -        *
> -        * Enable software signaling of fence.
> -        *
> -        * For fence implementations that have the capability for hw->hw
> -        * signaling, they can implement this op to enable the necessary
> -        * interrupts, or insert commands into cmdstream, etc, to avoid these
> -        * costly operations for the common case where only hw->hw
> -        * synchronization is required.  This is called in the first
> -        * dma_fence_wait() or dma_fence_add_callback() path to let the fence
> -        * implementation know that there is another driver waiting on the
> -        * signal (ie. hw->sw case).
> -        *
> -        * This function can be called from atomic context, but not
> -        * from irq context, so normal spinlocks can be used.
> -        *
> -        * A return value of false indicates the fence already passed,
> -        * or some failure occurred that made it impossible to enable
> -        * signaling. True indicates successful enabling.
> -        *
> -        * &dma_fence.error may be set in enable_signaling, but only when false
> -        * is returned.
> -        *
> -        * Since many implementations can call dma_fence_signal() even when before
> -        * @enable_signaling has been called there's a race window, where the
> -        * dma_fence_signal() might result in the final fence reference being
> -        * released and its memory freed. To avoid this, implementations of this
> -        * callback should grab their own reference using dma_fence_get(), to be
> -        * released when the fence is signalled (through e.g. the interrupt
> -        * handler).
> -        *
> -        * This callback is optional. If this callback is not present, then the
> -        * driver must always have signaling enabled.
> -        */
> -       bool (*enable_signaling)(struct dma_fence *fence);
> -
> -       /**
> -        * @signaled:
> -        *
> -        * Peek whether the fence is signaled, as a fastpath optimization for
> -        * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
> -        * callback does not need to make any guarantees beyond that a fence
> -        * once indicates as signalled must always return true from this
> -        * callback. This callback may return false even if the fence has
> -        * completed already, in this case information hasn't propogated throug
> -        * the system yet. See also dma_fence_is_signaled().
> -        *
> -        * May set &dma_fence.error if returning true.
> -        *
> -        * This callback is optional.
> -        */
> -       bool (*signaled)(struct dma_fence *fence);
> -
> -       /**
> -        * @wait:
> -        *
> -        * Custom wait implementation, defaults to dma_fence_default_wait() if
> -        * not set.
> -        *
> -        * The dma_fence_default_wait implementation should work for any fence, as long
> -        * as @enable_signaling works correctly. This hook allows drivers to
> -        * have an optimized version for the case where a process context is
> -        * already available, e.g. if @enable_signaling for the general case
> -        * needs to set up a worker thread.
> -        *
> -        * Must return -ERESTARTSYS if the wait is intr = true and the wait was
> -        * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
> -        * timed out. Can also return other error values on custom implementations,
> -        * which should be treated as if the fence is signaled. For example a hardware
> -        * lockup could be reported like that.
> -        *
> -        * This callback is optional.
> -        */
> -       signed long (*wait)(struct dma_fence *fence,
> -                           bool intr, signed long timeout);
> -
> -       /**
> -        * @release:
> -        *
> -        * Called on destruction of fence to release additional resources.
> -        * Can be called from irq context.  This callback is optional. If it is
> -        * NULL, then dma_fence_free() is instead called as the default
> -        * implementation.
> -        */
> -       void (*release)(struct dma_fence *fence);
> -
> -       /**
> -        * @fence_value_str:
> -        *
> -        * Callback to fill in free-form debug info specific to this fence, like
> -        * the sequence number.
> -        *
> -        * This callback is optional.
> -        */
> -       void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
> -
> -       /**
> -        * @timeline_value_str:
> -        *
> -        * Fills in the current value of the timeline as a string, like the
> -        * sequence number. Note that the specific fence passed to this function
> -        * should not matter, drivers should only use it to look up the
> -        * corresponding timeline structures.
> -        */
> -       void (*timeline_value_str)(struct dma_fence *fence,
> -                                  char *str, int size);
> -};
> +#include <trace/events/dma_fence.h>
>
>  void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
>                     spinlock_t *lock, u64 context, u64 seqno);
> @@ -561,6 +343,35 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
>  struct dma_fence *dma_fence_get_stub(void);
>  u64 dma_fence_context_alloc(unsigned num);
>
> +static inline bool
> +__dma_fence_signal(struct dma_fence *fence)
> +{
> +       return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
> +}
> +
> +static inline void
> +__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
> +{
> +       fence->timestamp = timestamp;
> +       set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
> +       trace_dma_fence_signaled(fence);
> +}
> +
> +static inline void
> +__dma_fence_signal__notify(struct dma_fence *fence)
> +{
> +       struct dma_fence_cb *cur, *tmp;
> +
> +       lockdep_assert_held(fence->lock);
> +       lockdep_assert_irqs_disabled();
> +
> +       list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> +               INIT_LIST_HEAD(&cur->node);
> +               cur->func(fence, cur);
> +       }
> +       INIT_LIST_HEAD(&fence->cb_list);
> +}
> +
>  #define DMA_FENCE_TRACE(f, fmt, args...) \
>         do {                                                            \
>                 struct dma_fence *__ff = (f);                           \
> --
> 2.20.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox series

Patch

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 1f006e083eb9..56e579878f26 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,5 +1,11 @@ 
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
-	 reservation.o seqno-fence.o
+obj-y := \
+	dma-buf.o \
+	dma-fence.o \
+	dma-fence-array.o \
+	dma-fence-chain.o \
+	dma-fence-trace.o \
+	reservation.o \
+	seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)		+= udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-trace.c b/drivers/dma-buf/dma-fence-trace.c
new file mode 100644
index 000000000000..eb6f282be4c0
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-trace.c
@@ -0,0 +1,28 @@ 
+/*
+ * Fence mechanism for dma-buf and to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-fence-types.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/dma_fence.h>
+
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 9bf06042619a..8196a179fdc2 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -24,13 +24,6 @@ 
 #include <linux/dma-fence.h>
 #include <linux/sched/signal.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/dma_fence.h>
-
-EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
-EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
-EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
-
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
 
@@ -136,7 +129,6 @@  EXPORT_SYMBOL(dma_fence_context_alloc);
  */
 int dma_fence_signal_locked(struct dma_fence *fence)
 {
-	struct dma_fence_cb *cur, *tmp;
 	int ret = 0;
 
 	lockdep_assert_held(fence->lock);
@@ -144,7 +136,7 @@  int dma_fence_signal_locked(struct dma_fence *fence)
 	if (WARN_ON(!fence))
 		return -EINVAL;
 
-	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+	if (!__dma_fence_signal(fence)) {
 		ret = -EINVAL;
 
 		/*
@@ -152,15 +144,10 @@  int dma_fence_signal_locked(struct dma_fence *fence)
 		 * still run through all callbacks
 		 */
 	} else {
-		fence->timestamp = ktime_get();
-		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
-		trace_dma_fence_signaled(fence);
+		__dma_fence_signal__timestamp(fence, ktime_get());
 	}
 
-	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-		list_del_init(&cur->node);
-		cur->func(fence, cur);
-	}
+	__dma_fence_signal__notify(fence);
 	return ret;
 }
 EXPORT_SYMBOL(dma_fence_signal_locked);
@@ -185,21 +172,14 @@  int dma_fence_signal(struct dma_fence *fence)
 	if (!fence)
 		return -EINVAL;
 
-	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+	if (!__dma_fence_signal(fence))
 		return -EINVAL;
 
-	fence->timestamp = ktime_get();
-	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
-	trace_dma_fence_signaled(fence);
+	__dma_fence_signal__timestamp(fence, ktime_get());
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
-		struct dma_fence_cb *cur, *tmp;
-
 		spin_lock_irqsave(fence->lock, flags);
-		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-			list_del_init(&cur->node);
-			cur->func(fence, cur);
-		}
+		__dma_fence_signal__notify(fence);
 		spin_unlock_irqrestore(fence->lock, flags);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index c092bdf5f0bf..d1f8572100c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -23,7 +23,6 @@ 
  */
 
 #include <linux/kthread.h>
-#include <trace/events/dma_fence.h>
 #include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
@@ -97,35 +96,6 @@  check_signal_order(struct intel_context *ce, struct i915_request *rq)
 	return true;
 }
 
-static bool
-__dma_fence_signal(struct dma_fence *fence)
-{
-	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
-}
-
-static void
-__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
-{
-	fence->timestamp = timestamp;
-	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
-	trace_dma_fence_signaled(fence);
-}
-
-static void
-__dma_fence_signal__notify(struct dma_fence *fence)
-{
-	struct dma_fence_cb *cur, *tmp;
-
-	lockdep_assert_held(fence->lock);
-	lockdep_assert_irqs_disabled();
-
-	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-		INIT_LIST_HEAD(&cur->node);
-		cur->func(fence, cur);
-	}
-	INIT_LIST_HEAD(&fence->cb_list);
-}
-
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
diff --git a/include/linux/dma-fence-types.h b/include/linux/dma-fence-types.h
new file mode 100644
index 000000000000..18e7511c0eed
--- /dev/null
+++ b/include/linux/dma-fence-types.h
@@ -0,0 +1,248 @@ 
+/*
+ * Fence mechanism for dma-buf to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_DMA_FENCE_TYPES_H
+#define __LINUX_DMA_FENCE_TYPES_H
+
+#include <linux/kref.h>
+#include <linux/ktime.h>
+
+struct dma_fence;
+struct dma_fence_ops;
+struct dma_fence_cb;
+
+/**
+ * struct dma_fence - software synchronization primitive
+ * @refcount: refcount for this fence
+ * @ops: dma_fence_ops associated with this fence
+ * @rcu: used for releasing fence with kfree_rcu
+ * @cb_list: list of all callbacks to call
+ * @lock: spin_lock_irqsave used for locking
+ * @context: execution context this fence belongs to, returned by
+ *           dma_fence_context_alloc()
+ * @seqno: the sequence number of this fence inside the execution context,
+ * can be compared to decide which fence would be signaled later.
+ * @flags: A mask of DMA_FENCE_FLAG_* defined below
+ * @timestamp: Timestamp when the fence was signaled.
+ * @error: Optional, only valid if < 0, must be set before calling
+ * dma_fence_signal, indicates that the fence has completed with an error.
+ *
+ * the flags member must be manipulated and read using the appropriate
+ * atomic ops (bit_*), so taking the spinlock will not be needed most
+ * of the time.
+ *
+ * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
+ * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
+ * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
+ * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
+ * implementer of the fence for its own purposes. Can be used in different
+ * ways by different fence implementers, so do not rely on this.
+ *
+ * Since atomic bitops are used, this is not guaranteed to be the case.
+ * Particularly, if the bit was set, but dma_fence_signal was called right
+ * before this bit was set, it would have been able to set the
+ * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
+ * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
+ * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
+ * after dma_fence_signal was called, any enable_signaling call will have either
+ * been completed, or never called at all.
+ */
+struct dma_fence {
+	struct kref refcount;
+	const struct dma_fence_ops *ops;
+	struct rcu_head rcu;
+	struct list_head cb_list;
+	spinlock_t *lock;
+	u64 context;
+	u64 seqno;
+	unsigned long flags;
+	ktime_t timestamp;
+	int error;
+};
+
+enum dma_fence_flag_bits {
+	DMA_FENCE_FLAG_SIGNALED_BIT,
+	DMA_FENCE_FLAG_TIMESTAMP_BIT,
+	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
+};
+
+typedef void (*dma_fence_func_t)(struct dma_fence *fence,
+				 struct dma_fence_cb *cb);
+
+/**
+ * struct dma_fence_cb - callback for dma_fence_add_callback()
+ * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
+ * @func: dma_fence_func_t to call
+ *
+ * This struct will be initialized by dma_fence_add_callback(), additional
+ * data can be passed along by embedding dma_fence_cb in another struct.
+ */
+struct dma_fence_cb {
+	struct list_head node;
+	dma_fence_func_t func;
+};
+
+/**
+ * struct dma_fence_ops - operations implemented for fence
+ *
+ */
+struct dma_fence_ops {
+	/**
+	 * @use_64bit_seqno:
+	 *
+	 * True if this dma_fence implementation uses 64bit seqno, false
+	 * otherwise.
+	 */
+	bool use_64bit_seqno;
+
+	/**
+	 * @get_driver_name:
+	 *
+	 * Returns the driver name. This is a callback to allow drivers to
+	 * compute the name at runtime, without having it to store permanently
+	 * for each fence, or build a cache of some sort.
+	 *
+	 * This callback is mandatory.
+	 */
+	const char * (*get_driver_name)(struct dma_fence *fence);
+
+	/**
+	 * @get_timeline_name:
+	 *
+	 * Return the name of the context this fence belongs to. This is a
+	 * callback to allow drivers to compute the name at runtime, without
+	 * having it to store permanently for each fence, or build a cache of
+	 * some sort.
+	 *
+	 * This callback is mandatory.
+	 */
+	const char * (*get_timeline_name)(struct dma_fence *fence);
+
+	/**
+	 * @enable_signaling:
+	 *
+	 * Enable software signaling of fence.
+	 *
+	 * For fence implementations that have the capability for hw->hw
+	 * signaling, they can implement this op to enable the necessary
+	 * interrupts, or insert commands into cmdstream, etc, to avoid these
+	 * costly operations for the common case where only hw->hw
+	 * synchronization is required.  This is called in the first
+	 * dma_fence_wait() or dma_fence_add_callback() path to let the fence
+	 * implementation know that there is another driver waiting on the
+	 * signal (ie. hw->sw case).
+	 *
+	 * This function can be called from atomic context, but not
+	 * from irq context, so normal spinlocks can be used.
+	 *
+	 * A return value of false indicates the fence already passed,
+	 * or some failure occurred that made it impossible to enable
+	 * signaling. True indicates successful enabling.
+	 *
+	 * &dma_fence.error may be set in enable_signaling, but only when false
+	 * is returned.
+	 *
+	 * Since many implementations can call dma_fence_signal() even when before
+	 * @enable_signaling has been called there's a race window, where the
+	 * dma_fence_signal() might result in the final fence reference being
+	 * released and its memory freed. To avoid this, implementations of this
+	 * callback should grab their own reference using dma_fence_get(), to be
+	 * released when the fence is signalled (through e.g. the interrupt
+	 * handler).
+	 *
+	 * This callback is optional. If this callback is not present, then the
+	 * driver must always have signaling enabled.
+	 */
+	bool (*enable_signaling)(struct dma_fence *fence);
+
+	/**
+	 * @signaled:
+	 *
+	 * Peek whether the fence is signaled, as a fastpath optimization for
+	 * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
+	 * callback does not need to make any guarantees beyond that a fence
+	 * once indicates as signalled must always return true from this
+	 * callback. This callback may return false even if the fence has
+	 * completed already, in this case information hasn't propogated throug
+	 * the system yet. See also dma_fence_is_signaled().
+	 *
+	 * May set &dma_fence.error if returning true.
+	 *
+	 * This callback is optional.
+	 */
+	bool (*signaled)(struct dma_fence *fence);
+
+	/**
+	 * @wait:
+	 *
+	 * Custom wait implementation, defaults to dma_fence_default_wait() if
+	 * not set.
+	 *
+	 * The dma_fence_default_wait implementation should work for any fence, as long
+	 * as @enable_signaling works correctly. This hook allows drivers to
+	 * have an optimized version for the case where a process context is
+	 * already available, e.g. if @enable_signaling for the general case
+	 * needs to set up a worker thread.
+	 *
+	 * Must return -ERESTARTSYS if the wait is intr = true and the wait was
+	 * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
+	 * timed out. Can also return other error values on custom implementations,
+	 * which should be treated as if the fence is signaled. For example a hardware
+	 * lockup could be reported like that.
+	 *
+	 * This callback is optional.
+	 */
+	signed long (*wait)(struct dma_fence *fence,
+			    bool intr, signed long timeout);
+
+	/**
+	 * @release:
+	 *
+	 * Called on destruction of fence to release additional resources.
+	 * Can be called from irq context.  This callback is optional. If it is
+	 * NULL, then dma_fence_free() is instead called as the default
+	 * implementation.
+	 */
+	void (*release)(struct dma_fence *fence);
+
+	/**
+	 * @fence_value_str:
+	 *
+	 * Callback to fill in free-form debug info specific to this fence, like
+	 * the sequence number.
+	 *
+	 * This callback is optional.
+	 */
+	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
+
+	/**
+	 * @timeline_value_str:
+	 *
+	 * Fills in the current value of the timeline as a string, like the
+	 * sequence number. Note that the specific fence passed to this function
+	 * should not matter, drivers should only use it to look up the
+	 * corresponding timeline structures.
+	 */
+	void (*timeline_value_str)(struct dma_fence *fence,
+				   char *str, int size);
+};
+
+#endif /* __LINUX_DMA_FENCE_TYPES_H */
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 974717d6ac0c..142eb67e695f 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -21,6 +21,7 @@ 
 #ifndef __LINUX_DMA_FENCE_H
 #define __LINUX_DMA_FENCE_H
 
+#include <linux/dma-fence-types.h>
 #include <linux/err.h>
 #include <linux/wait.h>
 #include <linux/list.h>
@@ -30,226 +31,7 @@ 
 #include <linux/printk.h>
 #include <linux/rcupdate.h>
 
-struct dma_fence;
-struct dma_fence_ops;
-struct dma_fence_cb;
-
-/**
- * struct dma_fence - software synchronization primitive
- * @refcount: refcount for this fence
- * @ops: dma_fence_ops associated with this fence
- * @rcu: used for releasing fence with kfree_rcu
- * @cb_list: list of all callbacks to call
- * @lock: spin_lock_irqsave used for locking
- * @context: execution context this fence belongs to, returned by
- *           dma_fence_context_alloc()
- * @seqno: the sequence number of this fence inside the execution context,
- * can be compared to decide which fence would be signaled later.
- * @flags: A mask of DMA_FENCE_FLAG_* defined below
- * @timestamp: Timestamp when the fence was signaled.
- * @error: Optional, only valid if < 0, must be set before calling
- * dma_fence_signal, indicates that the fence has completed with an error.
- *
- * the flags member must be manipulated and read using the appropriate
- * atomic ops (bit_*), so taking the spinlock will not be needed most
- * of the time.
- *
- * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
- * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
- * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
- * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
- * implementer of the fence for its own purposes. Can be used in different
- * ways by different fence implementers, so do not rely on this.
- *
- * Since atomic bitops are used, this is not guaranteed to be the case.
- * Particularly, if the bit was set, but dma_fence_signal was called right
- * before this bit was set, it would have been able to set the
- * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
- * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
- * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
- * after dma_fence_signal was called, any enable_signaling call will have either
- * been completed, or never called at all.
- */
-struct dma_fence {
-	struct kref refcount;
-	const struct dma_fence_ops *ops;
-	struct rcu_head rcu;
-	struct list_head cb_list;
-	spinlock_t *lock;
-	u64 context;
-	u64 seqno;
-	unsigned long flags;
-	ktime_t timestamp;
-	int error;
-};
-
-enum dma_fence_flag_bits {
-	DMA_FENCE_FLAG_SIGNALED_BIT,
-	DMA_FENCE_FLAG_TIMESTAMP_BIT,
-	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
-};
-
-typedef void (*dma_fence_func_t)(struct dma_fence *fence,
-				 struct dma_fence_cb *cb);
-
-/**
- * struct dma_fence_cb - callback for dma_fence_add_callback()
- * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
- * @func: dma_fence_func_t to call
- *
- * This struct will be initialized by dma_fence_add_callback(), additional
- * data can be passed along by embedding dma_fence_cb in another struct.
- */
-struct dma_fence_cb {
-	struct list_head node;
-	dma_fence_func_t func;
-};
-
-/**
- * struct dma_fence_ops - operations implemented for fence
- *
- */
-struct dma_fence_ops {
-	/**
-	 * @use_64bit_seqno:
-	 *
-	 * True if this dma_fence implementation uses 64bit seqno, false
-	 * otherwise.
-	 */
-	bool use_64bit_seqno;
-
-	/**
-	 * @get_driver_name:
-	 *
-	 * Returns the driver name. This is a callback to allow drivers to
-	 * compute the name at runtime, without having it to store permanently
-	 * for each fence, or build a cache of some sort.
-	 *
-	 * This callback is mandatory.
-	 */
-	const char * (*get_driver_name)(struct dma_fence *fence);
-
-	/**
-	 * @get_timeline_name:
-	 *
-	 * Return the name of the context this fence belongs to. This is a
-	 * callback to allow drivers to compute the name at runtime, without
-	 * having it to store permanently for each fence, or build a cache of
-	 * some sort.
-	 *
-	 * This callback is mandatory.
-	 */
-	const char * (*get_timeline_name)(struct dma_fence *fence);
-
-	/**
-	 * @enable_signaling:
-	 *
-	 * Enable software signaling of fence.
-	 *
-	 * For fence implementations that have the capability for hw->hw
-	 * signaling, they can implement this op to enable the necessary
-	 * interrupts, or insert commands into cmdstream, etc, to avoid these
-	 * costly operations for the common case where only hw->hw
-	 * synchronization is required.  This is called in the first
-	 * dma_fence_wait() or dma_fence_add_callback() path to let the fence
-	 * implementation know that there is another driver waiting on the
-	 * signal (ie. hw->sw case).
-	 *
-	 * This function can be called from atomic context, but not
-	 * from irq context, so normal spinlocks can be used.
-	 *
-	 * A return value of false indicates the fence already passed,
-	 * or some failure occurred that made it impossible to enable
-	 * signaling. True indicates successful enabling.
-	 *
-	 * &dma_fence.error may be set in enable_signaling, but only when false
-	 * is returned.
-	 *
-	 * Since many implementations can call dma_fence_signal() even when before
-	 * @enable_signaling has been called there's a race window, where the
-	 * dma_fence_signal() might result in the final fence reference being
-	 * released and its memory freed. To avoid this, implementations of this
-	 * callback should grab their own reference using dma_fence_get(), to be
-	 * released when the fence is signalled (through e.g. the interrupt
-	 * handler).
-	 *
-	 * This callback is optional. If this callback is not present, then the
-	 * driver must always have signaling enabled.
-	 */
-	bool (*enable_signaling)(struct dma_fence *fence);
-
-	/**
-	 * @signaled:
-	 *
-	 * Peek whether the fence is signaled, as a fastpath optimization for
-	 * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
-	 * callback does not need to make any guarantees beyond that a fence
-	 * once indicates as signalled must always return true from this
-	 * callback. This callback may return false even if the fence has
-	 * completed already, in this case information hasn't propogated throug
-	 * the system yet. See also dma_fence_is_signaled().
-	 *
-	 * May set &dma_fence.error if returning true.
-	 *
-	 * This callback is optional.
-	 */
-	bool (*signaled)(struct dma_fence *fence);
-
-	/**
-	 * @wait:
-	 *
-	 * Custom wait implementation, defaults to dma_fence_default_wait() if
-	 * not set.
-	 *
-	 * The dma_fence_default_wait implementation should work for any fence, as long
-	 * as @enable_signaling works correctly. This hook allows drivers to
-	 * have an optimized version for the case where a process context is
-	 * already available, e.g. if @enable_signaling for the general case
-	 * needs to set up a worker thread.
-	 *
-	 * Must return -ERESTARTSYS if the wait is intr = true and the wait was
-	 * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
-	 * timed out. Can also return other error values on custom implementations,
-	 * which should be treated as if the fence is signaled. For example a hardware
-	 * lockup could be reported like that.
-	 *
-	 * This callback is optional.
-	 */
-	signed long (*wait)(struct dma_fence *fence,
-			    bool intr, signed long timeout);
-
-	/**
-	 * @release:
-	 *
-	 * Called on destruction of fence to release additional resources.
-	 * Can be called from irq context.  This callback is optional. If it is
-	 * NULL, then dma_fence_free() is instead called as the default
-	 * implementation.
-	 */
-	void (*release)(struct dma_fence *fence);
-
-	/**
-	 * @fence_value_str:
-	 *
-	 * Callback to fill in free-form debug info specific to this fence, like
-	 * the sequence number.
-	 *
-	 * This callback is optional.
-	 */
-	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
-
-	/**
-	 * @timeline_value_str:
-	 *
-	 * Fills in the current value of the timeline as a string, like the
-	 * sequence number. Note that the specific fence passed to this function
-	 * should not matter, drivers should only use it to look up the
-	 * corresponding timeline structures.
-	 */
-	void (*timeline_value_str)(struct dma_fence *fence,
-				   char *str, int size);
-};
+#include <trace/events/dma_fence.h>
 
 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 		    spinlock_t *lock, u64 context, u64 seqno);
@@ -561,6 +343,35 @@  static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 struct dma_fence *dma_fence_get_stub(void);
 u64 dma_fence_context_alloc(unsigned num);
 
+static inline bool
+__dma_fence_signal(struct dma_fence *fence)
+{
+	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
+}
+
+static inline void
+__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
+{
+	fence->timestamp = timestamp;
+	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
+	trace_dma_fence_signaled(fence);
+}
+
+static inline void
+__dma_fence_signal__notify(struct dma_fence *fence)
+{
+	struct dma_fence_cb *cur, *tmp;
+
+	lockdep_assert_held(fence->lock);
+	lockdep_assert_irqs_disabled();
+
+	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+		INIT_LIST_HEAD(&cur->node);
+		cur->func(fence, cur);
+	}
+	INIT_LIST_HEAD(&fence->cb_list);
+}
+
 #define DMA_FENCE_TRACE(f, fmt, args...) \
 	do {								\
 		struct dma_fence *__ff = (f);				\