Message ID | 20190909093116.7747-7-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Vulkan performance query support | expand |
Looks good. The struct perf inside drm_i915_private could also move to i915_perf_types.h. Irrespective of that, Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Regards, Umesh On Mon, Sep 09, 2019 at 12:31:09PM +0300, Lionel Landwerlin wrote: >Following a pattern used throughout the driver. > >Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >--- > drivers/gpu/drm/i915/i915_drv.h | 300 +---------------------- > drivers/gpu/drm/i915/i915_perf.h | 2 + > drivers/gpu/drm/i915/i915_perf_types.h | 327 +++++++++++++++++++++++++ > 3 files changed, 330 insertions(+), 299 deletions(-) > create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h > >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >index 274a1193d4f0..f4145ae6ab6e 100644 >--- a/drivers/gpu/drm/i915/i915_drv.h >+++ b/drivers/gpu/drm/i915/i915_drv.h >@@ -92,6 +92,7 @@ > #include "i915_gem_fence_reg.h" > #include "i915_gem_gtt.h" > #include "i915_gpu_error.h" >+#include "i915_perf_types.h" > #include "i915_request.h" > #include "i915_scheduler.h" > #include "gt/intel_timeline.h" >@@ -979,305 +980,6 @@ struct intel_wm_config { > bool sprites_scaled; > }; > >-struct i915_oa_format { >- u32 format; >- int size; >-}; >- >-struct i915_oa_reg { >- i915_reg_t addr; >- u32 value; >-}; >- >-struct i915_oa_config { >- char uuid[UUID_STRING_LEN + 1]; >- int id; >- >- const struct i915_oa_reg *mux_regs; >- u32 mux_regs_len; >- const struct i915_oa_reg *b_counter_regs; >- u32 b_counter_regs_len; >- const struct i915_oa_reg *flex_regs; >- u32 flex_regs_len; >- >- struct attribute_group sysfs_metric; >- struct attribute *attrs[2]; >- struct device_attribute sysfs_metric_id; >- >- atomic_t ref_count; >-}; >- >-struct i915_perf_stream; >- >-/** >- * struct i915_perf_stream_ops - the OPs to support a specific stream type >- */ >-struct i915_perf_stream_ops { >- /** >- * @enable: Enables the collection of HW samples, either in response to >- * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened >- * without `I915_PERF_FLAG_DISABLED`. >- */ >- void (*enable)(struct i915_perf_stream *stream); >- >- /** >- * @disable: Disables the collection of HW samples, either in response >- * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying >- * the stream. >- */ >- void (*disable)(struct i915_perf_stream *stream); >- >- /** >- * @poll_wait: Call poll_wait, passing a wait queue that will be woken >- * once there is something ready to read() for the stream >- */ >- void (*poll_wait)(struct i915_perf_stream *stream, >- struct file *file, >- poll_table *wait); >- >- /** >- * @wait_unlocked: For handling a blocking read, wait until there is >- * something to ready to read() for the stream. E.g. wait on the same >- * wait queue that would be passed to poll_wait(). >- */ >- int (*wait_unlocked)(struct i915_perf_stream *stream); >- >- /** >- * @read: Copy buffered metrics as records to userspace >- * **buf**: the userspace, destination buffer >- * **count**: the number of bytes to copy, requested by userspace >- * **offset**: zero at the start of the read, updated as the read >- * proceeds, it represents how many bytes have been copied so far and >- * the buffer offset for copying the next record. >- * >- * Copy as many buffered i915 perf samples and records for this stream >- * to userspace as will fit in the given buffer. >- * >- * Only write complete records; returning -%ENOSPC if there isn't room >- * for a complete record. >- * >- * Return any error condition that results in a short read such as >- * -%ENOSPC or -%EFAULT, even though these may be squashed before >- * returning to userspace. >- */ >- int (*read)(struct i915_perf_stream *stream, >- char __user *buf, >- size_t count, >- size_t *offset); >- >- /** >- * @destroy: Cleanup any stream specific resources. >- * >- * The stream will always be disabled before this is called. >- */ >- void (*destroy)(struct i915_perf_stream *stream); >-}; >- >-/** >- * struct i915_perf_stream - state for a single open stream FD >- */ >-struct i915_perf_stream { >- /** >- * @dev_priv: i915 drm device >- */ >- struct drm_i915_private *dev_priv; >- >- /** >- * @wakeref: As we keep the device awake while the perf stream is >- * active, we track our runtime pm reference for later release. >- */ >- intel_wakeref_t wakeref; >- >- /** >- * @engine: Engine associated with this performance stream. >- */ >- struct intel_engine_cs *engine; >- >- /** >- * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` >- * properties given when opening a stream, representing the contents >- * of a single sample as read() by userspace. >- */ >- u32 sample_flags; >- >- /** >- * @sample_size: Considering the configured contents of a sample >- * combined with the required header size, this is the total size >- * of a single sample record. >- */ >- int sample_size; >- >- /** >- * @ctx: %NULL if measuring system-wide across all contexts or a >- * specific context that is being monitored. >- */ >- struct i915_gem_context *ctx; >- >- /** >- * @enabled: Whether the stream is currently enabled, considering >- * whether the stream was opened in a disabled state and based >- * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. >- */ >- bool enabled; >- >- /** >- * @ops: The callbacks providing the implementation of this specific >- * type of configured stream. >- */ >- const struct i915_perf_stream_ops *ops; >- >- /** >- * @oa_config: The OA configuration used by the stream. >- */ >- struct i915_oa_config *oa_config; >- >- /** >- * The OA context specific information. >- */ >- struct intel_context *pinned_ctx; >- u32 specific_ctx_id; >- u32 specific_ctx_id_mask; >- >- struct hrtimer poll_check_timer; >- wait_queue_head_t poll_wq; >- bool pollin; >- >- bool periodic; >- int period_exponent; >- >- /** >- * State of the OA buffer. >- */ >- struct { >- struct i915_vma *vma; >- u8 *vaddr; >- u32 last_ctx_id; >- int format; >- int format_size; >- int size_exponent; >- >- /** >- * Locks reads and writes to all head/tail state >- * >- * Consider: the head and tail pointer state needs to be read >- * consistently from a hrtimer callback (atomic context) and >- * read() fop (user context) with tail pointer updates happening >- * in atomic context and head updates in user context and the >- * (unlikely) possibility of read() errors needing to reset all >- * head/tail state. >- * >- * Note: Contention/performance aren't currently a significant >- * concern here considering the relatively low frequency of >- * hrtimer callbacks (5ms period) and that reads typically only >- * happen in response to a hrtimer event and likely complete >- * before the next callback. >- * >- * Note: This lock is not held *while* reading and copying data >- * to userspace so the value of head observed in htrimer >- * callbacks won't represent any partial consumption of data. >- */ >- spinlock_t ptr_lock; >- >- /** >- * One 'aging' tail pointer and one 'aged' tail pointer ready to >- * used for reading. >- * >- * Initial values of 0xffffffff are invalid and imply that an >- * update is required (and should be ignored by an attempted >- * read) >- */ >- struct { >- u32 offset; >- } tails[2]; >- >- /** >- * Index for the aged tail ready to read() data up to. >- */ >- unsigned int aged_tail_idx; >- >- /** >- * A monotonic timestamp for when the current aging tail pointer >- * was read; used to determine when it is old enough to trust. >- */ >- u64 aging_timestamp; >- >- /** >- * Although we can always read back the head pointer register, >- * we prefer to avoid trusting the HW state, just to avoid any >- * risk that some hardware condition could * somehow bump the >- * head pointer unpredictably and cause us to forward the wrong >- * OA buffer data to userspace. >- */ >- u32 head; >- } oa_buffer; >-}; >- >-/** >- * struct i915_oa_ops - Gen specific implementation of an OA unit stream >- */ >-struct i915_oa_ops { >- /** >- * @is_valid_b_counter_reg: Validates register's address for >- * programming boolean counters for a particular platform. >- */ >- bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, >- u32 addr); >- >- /** >- * @is_valid_mux_reg: Validates register's address for programming mux >- * for a particular platform. >- */ >- bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); >- >- /** >- * @is_valid_flex_reg: Validates register's address for programming >- * flex EU filtering for a particular platform. >- */ >- bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); >- >- /** >- * @enable_metric_set: Selects and applies any MUX configuration to set >- * up the Boolean and Custom (B/C) counters that are part of the >- * counter reports being sampled. May apply system constraints such as >- * disabling EU clock gating as required. >- */ >- int (*enable_metric_set)(struct i915_perf_stream *stream); >- >- /** >- * @disable_metric_set: Remove system constraints associated with using >- * the OA unit. >- */ >- void (*disable_metric_set)(struct i915_perf_stream *stream); >- >- /** >- * @oa_enable: Enable periodic sampling >- */ >- void (*oa_enable)(struct i915_perf_stream *stream); >- >- /** >- * @oa_disable: Disable periodic sampling >- */ >- void (*oa_disable)(struct i915_perf_stream *stream); >- >- /** >- * @read: Copy data from the circular OA buffer into a given userspace >- * buffer. >- */ >- int (*read)(struct i915_perf_stream *stream, >- char __user *buf, >- size_t count, >- size_t *offset); >- >- /** >- * @oa_hw_tail_read: read the OA tail pointer register >- * >- * In particular this enables us to share all the fiddly code for >- * handling the OA unit tail pointer race that affects multiple >- * generations. >- */ >- u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); >-}; >- > struct intel_cdclk_state { > unsigned int cdclk, vco, ref, bypass; > u8 voltage_level; >diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h >index 95549de65212..270193526b56 100644 >--- a/drivers/gpu/drm/i915/i915_perf.h >+++ b/drivers/gpu/drm/i915/i915_perf.h >@@ -8,6 +8,8 @@ > > #include <linux/types.h> > >+#include "i915_perf_types.h" >+ > struct drm_device; > struct drm_file; > struct drm_i915_private; >diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h >new file mode 100644 >index 000000000000..c6673b67d694 >--- /dev/null >+++ b/drivers/gpu/drm/i915/i915_perf_types.h >@@ -0,0 +1,327 @@ >+/* SPDX-License-Identifier: MIT */ >+/* >+ * Copyright © 2019 Intel Corporation >+ */ >+ >+#ifndef _I915_PERF_TYPES_H_ >+#define _I915_PERF_TYPES_H_ >+ >+#include <linux/atomic.h> >+#include <linux/device.h> >+#include <linux/hrtimer.h> >+#include <linux/list.h> >+#include <linux/poll.h> >+#include <linux/sysfs.h> >+#include <linux/uuid.h> >+#include <linux/wait.h> >+ >+#include "i915_reg.h" >+#include "intel_wakeref.h" >+ >+struct drm_i915_private; >+struct file; >+struct i915_gem_context; >+struct i915_vma; >+struct intel_context; >+struct intel_engine_cs; >+ >+struct i915_oa_format { >+ u32 format; >+ int size; >+}; >+ >+struct i915_oa_reg { >+ i915_reg_t addr; >+ u32 value; >+}; >+ >+struct i915_oa_config { >+ char uuid[UUID_STRING_LEN + 1]; >+ int id; >+ >+ const struct i915_oa_reg *mux_regs; >+ u32 mux_regs_len; >+ const struct i915_oa_reg *b_counter_regs; >+ u32 b_counter_regs_len; >+ const struct i915_oa_reg *flex_regs; >+ u32 flex_regs_len; >+ >+ struct attribute_group sysfs_metric; >+ struct attribute *attrs[2]; >+ struct device_attribute sysfs_metric_id; >+ >+ atomic_t ref_count; >+}; >+ >+struct i915_perf_stream; >+ >+/** >+ * struct i915_perf_stream_ops - the OPs to support a specific stream type >+ */ >+struct i915_perf_stream_ops { >+ /** >+ * @enable: Enables the collection of HW samples, either in response to >+ * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened >+ * without `I915_PERF_FLAG_DISABLED`. >+ */ >+ void (*enable)(struct i915_perf_stream *stream); >+ >+ /** >+ * @disable: Disables the collection of HW samples, either in response >+ * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying >+ * the stream. >+ */ >+ void (*disable)(struct i915_perf_stream *stream); >+ >+ /** >+ * @poll_wait: Call poll_wait, passing a wait queue that will be woken >+ * once there is something ready to read() for the stream >+ */ >+ void (*poll_wait)(struct i915_perf_stream *stream, >+ struct file *file, >+ poll_table *wait); >+ >+ /** >+ * @wait_unlocked: For handling a blocking read, wait until there is >+ * something to ready to read() for the stream. E.g. wait on the same >+ * wait queue that would be passed to poll_wait(). >+ */ >+ int (*wait_unlocked)(struct i915_perf_stream *stream); >+ >+ /** >+ * @read: Copy buffered metrics as records to userspace >+ * **buf**: the userspace, destination buffer >+ * **count**: the number of bytes to copy, requested by userspace >+ * **offset**: zero at the start of the read, updated as the read >+ * proceeds, it represents how many bytes have been copied so far and >+ * the buffer offset for copying the next record. >+ * >+ * Copy as many buffered i915 perf samples and records for this stream >+ * to userspace as will fit in the given buffer. >+ * >+ * Only write complete records; returning -%ENOSPC if there isn't room >+ * for a complete record. >+ * >+ * Return any error condition that results in a short read such as >+ * -%ENOSPC or -%EFAULT, even though these may be squashed before >+ * returning to userspace. >+ */ >+ int (*read)(struct i915_perf_stream *stream, >+ char __user *buf, >+ size_t count, >+ size_t *offset); >+ >+ /** >+ * @destroy: Cleanup any stream specific resources. >+ * >+ * The stream will always be disabled before this is called. >+ */ >+ void (*destroy)(struct i915_perf_stream *stream); >+}; >+ >+/** >+ * struct i915_perf_stream - state for a single open stream FD >+ */ >+struct i915_perf_stream { >+ /** >+ * @dev_priv: i915 drm device >+ */ >+ struct drm_i915_private *dev_priv; >+ >+ /** >+ * @wakeref: As we keep the device awake while the perf stream is >+ * active, we track our runtime pm reference for later release. >+ */ >+ intel_wakeref_t wakeref; >+ >+ /** >+ * @engine: Engine associated with this performance stream. >+ */ >+ struct intel_engine_cs *engine; >+ >+ /** >+ * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` >+ * properties given when opening a stream, representing the contents >+ * of a single sample as read() by userspace. >+ */ >+ u32 sample_flags; >+ >+ /** >+ * @sample_size: Considering the configured contents of a sample >+ * combined with the required header size, this is the total size >+ * of a single sample record. >+ */ >+ int sample_size; >+ >+ /** >+ * @ctx: %NULL if measuring system-wide across all contexts or a >+ * specific context that is being monitored. >+ */ >+ struct i915_gem_context *ctx; >+ >+ /** >+ * @enabled: Whether the stream is currently enabled, considering >+ * whether the stream was opened in a disabled state and based >+ * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. >+ */ >+ bool enabled; >+ >+ /** >+ * @ops: The callbacks providing the implementation of this specific >+ * type of configured stream. >+ */ >+ const struct i915_perf_stream_ops *ops; >+ >+ /** >+ * @oa_config: The OA configuration used by the stream. >+ */ >+ struct i915_oa_config *oa_config; >+ >+ /** >+ * The OA context specific information. >+ */ >+ struct intel_context *pinned_ctx; >+ u32 specific_ctx_id; >+ u32 specific_ctx_id_mask; >+ >+ struct hrtimer poll_check_timer; >+ wait_queue_head_t poll_wq; >+ bool pollin; >+ >+ bool periodic; >+ int period_exponent; >+ >+ /** >+ * State of the OA buffer. >+ */ >+ struct { >+ struct i915_vma *vma; >+ u8 *vaddr; >+ u32 last_ctx_id; >+ int format; >+ int format_size; >+ int size_exponent; >+ >+ /** >+ * Locks reads and writes to all head/tail state >+ * >+ * Consider: the head and tail pointer state needs to be read >+ * consistently from a hrtimer callback (atomic context) and >+ * read() fop (user context) with tail pointer updates happening >+ * in atomic context and head updates in user context and the >+ * (unlikely) possibility of read() errors needing to reset all >+ * head/tail state. >+ * >+ * Note: Contention/performance aren't currently a significant >+ * concern here considering the relatively low frequency of >+ * hrtimer callbacks (5ms period) and that reads typically only >+ * happen in response to a hrtimer event and likely complete >+ * before the next callback. >+ * >+ * Note: This lock is not held *while* reading and copying data >+ * to userspace so the value of head observed in htrimer >+ * callbacks won't represent any partial consumption of data. >+ */ >+ spinlock_t ptr_lock; >+ >+ /** >+ * One 'aging' tail pointer and one 'aged' tail pointer ready to >+ * used for reading. >+ * >+ * Initial values of 0xffffffff are invalid and imply that an >+ * update is required (and should be ignored by an attempted >+ * read) >+ */ >+ struct { >+ u32 offset; >+ } tails[2]; >+ >+ /** >+ * Index for the aged tail ready to read() data up to. >+ */ >+ unsigned int aged_tail_idx; >+ >+ /** >+ * A monotonic timestamp for when the current aging tail pointer >+ * was read; used to determine when it is old enough to trust. >+ */ >+ u64 aging_timestamp; >+ >+ /** >+ * Although we can always read back the head pointer register, >+ * we prefer to avoid trusting the HW state, just to avoid any >+ * risk that some hardware condition could * somehow bump the >+ * head pointer unpredictably and cause us to forward the wrong >+ * OA buffer data to userspace. >+ */ >+ u32 head; >+ } oa_buffer; >+}; >+ >+/** >+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream >+ */ >+struct i915_oa_ops { >+ /** >+ * @is_valid_b_counter_reg: Validates register's address for >+ * programming boolean counters for a particular platform. >+ */ >+ bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, >+ u32 addr); >+ >+ /** >+ * @is_valid_mux_reg: Validates register's address for programming mux >+ * for a particular platform. >+ */ >+ bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); >+ >+ /** >+ * @is_valid_flex_reg: Validates register's address for programming >+ * flex EU filtering for a particular platform. >+ */ >+ bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); >+ >+ /** >+ * @enable_metric_set: Selects and applies any MUX configuration to set >+ * up the Boolean and Custom (B/C) counters that are part of the >+ * counter reports being sampled. May apply system constraints such as >+ * disabling EU clock gating as required. >+ */ >+ int (*enable_metric_set)(struct i915_perf_stream *stream); >+ >+ /** >+ * @disable_metric_set: Remove system constraints associated with using >+ * the OA unit. >+ */ >+ void (*disable_metric_set)(struct i915_perf_stream *stream); >+ >+ /** >+ * @oa_enable: Enable periodic sampling >+ */ >+ void (*oa_enable)(struct i915_perf_stream *stream); >+ >+ /** >+ * @oa_disable: Disable periodic sampling >+ */ >+ void (*oa_disable)(struct i915_perf_stream *stream); >+ >+ /** >+ * @read: Copy data from the circular OA buffer into a given userspace >+ * buffer. >+ */ >+ int (*read)(struct i915_perf_stream *stream, >+ char __user *buf, >+ size_t count, >+ size_t *offset); >+ >+ /** >+ * @oa_hw_tail_read: read the OA tail pointer register >+ * >+ * In particular this enables us to share all the fiddly code for >+ * handling the OA unit tail pointer race that affects multiple >+ * generations. >+ */ >+ u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); >+}; >+ >+#endif /* _I915_PERF_TYPES_H_ */ >-- >2.23.0 > >_______________________________________________ >Intel-gfx mailing list >Intel-gfx@lists.freedesktop.org >https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 274a1193d4f0..f4145ae6ab6e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -92,6 +92,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" +#include "i915_perf_types.h" #include "i915_request.h" #include "i915_scheduler.h" #include "gt/intel_timeline.h" @@ -979,305 +980,6 @@ struct intel_wm_config { bool sprites_scaled; }; -struct i915_oa_format { - u32 format; - int size; -}; - -struct i915_oa_reg { - i915_reg_t addr; - u32 value; -}; - -struct i915_oa_config { - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct i915_oa_reg *mux_regs; - u32 mux_regs_len; - const struct i915_oa_reg *b_counter_regs; - u32 b_counter_regs_len; - const struct i915_oa_reg *flex_regs; - u32 flex_regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; - - atomic_t ref_count; -}; - -struct i915_perf_stream; - -/** - * struct i915_perf_stream_ops - the OPs to support a specific stream type - */ -struct i915_perf_stream_ops { - /** - * @enable: Enables the collection of HW samples, either in response to - * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened - * without `I915_PERF_FLAG_DISABLED`. - */ - void (*enable)(struct i915_perf_stream *stream); - - /** - * @disable: Disables the collection of HW samples, either in response - * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying - * the stream. - */ - void (*disable)(struct i915_perf_stream *stream); - - /** - * @poll_wait: Call poll_wait, passing a wait queue that will be woken - * once there is something ready to read() for the stream - */ - void (*poll_wait)(struct i915_perf_stream *stream, - struct file *file, - poll_table *wait); - - /** - * @wait_unlocked: For handling a blocking read, wait until there is - * something to ready to read() for the stream. E.g. wait on the same - * wait queue that would be passed to poll_wait(). - */ - int (*wait_unlocked)(struct i915_perf_stream *stream); - - /** - * @read: Copy buffered metrics as records to userspace - * **buf**: the userspace, destination buffer - * **count**: the number of bytes to copy, requested by userspace - * **offset**: zero at the start of the read, updated as the read - * proceeds, it represents how many bytes have been copied so far and - * the buffer offset for copying the next record. - * - * Copy as many buffered i915 perf samples and records for this stream - * to userspace as will fit in the given buffer. - * - * Only write complete records; returning -%ENOSPC if there isn't room - * for a complete record. - * - * Return any error condition that results in a short read such as - * -%ENOSPC or -%EFAULT, even though these may be squashed before - * returning to userspace. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @destroy: Cleanup any stream specific resources. - * - * The stream will always be disabled before this is called. - */ - void (*destroy)(struct i915_perf_stream *stream); -}; - -/** - * struct i915_perf_stream - state for a single open stream FD - */ -struct i915_perf_stream { - /** - * @dev_priv: i915 drm device - */ - struct drm_i915_private *dev_priv; - - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - - /** - * @engine: Engine associated with this performance stream. - */ - struct intel_engine_cs *engine; - - /** - * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` - * properties given when opening a stream, representing the contents - * of a single sample as read() by userspace. - */ - u32 sample_flags; - - /** - * @sample_size: Considering the configured contents of a sample - * combined with the required header size, this is the total size - * of a single sample record. - */ - int sample_size; - - /** - * @ctx: %NULL if measuring system-wide across all contexts or a - * specific context that is being monitored. - */ - struct i915_gem_context *ctx; - - /** - * @enabled: Whether the stream is currently enabled, considering - * whether the stream was opened in a disabled state and based - * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. - */ - bool enabled; - - /** - * @ops: The callbacks providing the implementation of this specific - * type of configured stream. - */ - const struct i915_perf_stream_ops *ops; - - /** - * @oa_config: The OA configuration used by the stream. - */ - struct i915_oa_config *oa_config; - - /** - * The OA context specific information. - */ - struct intel_context *pinned_ctx; - u32 specific_ctx_id; - u32 specific_ctx_id_mask; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - - bool periodic; - int period_exponent; - - /** - * State of the OA buffer. - */ - struct { - struct i915_vma *vma; - u8 *vaddr; - u32 last_ctx_id; - int format; - int format_size; - int size_exponent; - - /** - * Locks reads and writes to all head/tail state - * - * Consider: the head and tail pointer state needs to be read - * consistently from a hrtimer callback (atomic context) and - * read() fop (user context) with tail pointer updates happening - * in atomic context and head updates in user context and the - * (unlikely) possibility of read() errors needing to reset all - * head/tail state. - * - * Note: Contention/performance aren't currently a significant - * concern here considering the relatively low frequency of - * hrtimer callbacks (5ms period) and that reads typically only - * happen in response to a hrtimer event and likely complete - * before the next callback. - * - * Note: This lock is not held *while* reading and copying data - * to userspace so the value of head observed in htrimer - * callbacks won't represent any partial consumption of data. - */ - spinlock_t ptr_lock; - - /** - * One 'aging' tail pointer and one 'aged' tail pointer ready to - * used for reading. - * - * Initial values of 0xffffffff are invalid and imply that an - * update is required (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * Index for the aged tail ready to read() data up to. - */ - unsigned int aged_tail_idx; - - /** - * A monotonic timestamp for when the current aging tail pointer - * was read; used to determine when it is old enough to trust. - */ - u64 aging_timestamp; - - /** - * Although we can always read back the head pointer register, - * we prefer to avoid trusting the HW state, just to avoid any - * risk that some hardware condition could * somehow bump the - * head pointer unpredictably and cause us to forward the wrong - * OA buffer data to userspace. - */ - u32 head; - } oa_buffer; -}; - -/** - * struct i915_oa_ops - Gen specific implementation of an OA unit stream - */ -struct i915_oa_ops { - /** - * @is_valid_b_counter_reg: Validates register's address for - * programming boolean counters for a particular platform. - */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); - - /** - * @is_valid_mux_reg: Validates register's address for programming mux - * for a particular platform. - */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @is_valid_flex_reg: Validates register's address for programming - * flex EU filtering for a particular platform. - */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @enable_metric_set: Selects and applies any MUX configuration to set - * up the Boolean and Custom (B/C) counters that are part of the - * counter reports being sampled. May apply system constraints such as - * disabling EU clock gating as required. - */ - int (*enable_metric_set)(struct i915_perf_stream *stream); - - /** - * @disable_metric_set: Remove system constraints associated with using - * the OA unit. - */ - void (*disable_metric_set)(struct i915_perf_stream *stream); - - /** - * @oa_enable: Enable periodic sampling - */ - void (*oa_enable)(struct i915_perf_stream *stream); - - /** - * @oa_disable: Disable periodic sampling - */ - void (*oa_disable)(struct i915_perf_stream *stream); - - /** - * @read: Copy data from the circular OA buffer into a given userspace - * buffer. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @oa_hw_tail_read: read the OA tail pointer register - * - * In particular this enables us to share all the fiddly code for - * handling the OA unit tail pointer race that affects multiple - * generations. - */ - u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); -}; - struct intel_cdclk_state { unsigned int cdclk, vco, ref, bypass; u8 voltage_level; diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 95549de65212..270193526b56 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#include "i915_perf_types.h" + struct drm_device; struct drm_file; struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h new file mode 100644 index 000000000000..c6673b67d694 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_PERF_TYPES_H_ +#define _I915_PERF_TYPES_H_ + +#include <linux/atomic.h> +#include <linux/device.h> +#include <linux/hrtimer.h> +#include <linux/list.h> +#include <linux/poll.h> +#include <linux/sysfs.h> +#include <linux/uuid.h> +#include <linux/wait.h> + +#include "i915_reg.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct file; +struct i915_gem_context; +struct i915_vma; +struct intel_context; +struct intel_engine_cs; + +struct i915_oa_format { + u32 format; + int size; +}; + +struct i915_oa_reg { + i915_reg_t addr; + u32 value; +}; + +struct i915_oa_config { + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct i915_oa_reg *mux_regs; + u32 mux_regs_len; + const struct i915_oa_reg *b_counter_regs; + u32 b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + u32 flex_regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct device_attribute sysfs_metric_id; + + atomic_t ref_count; +}; + +struct i915_perf_stream; + +/** + * struct i915_perf_stream_ops - the OPs to support a specific stream type + */ +struct i915_perf_stream_ops { + /** + * @enable: Enables the collection of HW samples, either in response to + * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened + * without `I915_PERF_FLAG_DISABLED`. + */ + void (*enable)(struct i915_perf_stream *stream); + + /** + * @disable: Disables the collection of HW samples, either in response + * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying + * the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /** + * @poll_wait: Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /** + * @wait_unlocked: For handling a blocking read, wait until there is + * something to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait(). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /** + * @read: Copy buffered metrics as records to userspace + * **buf**: the userspace, destination buffer + * **count**: the number of bytes to copy, requested by userspace + * **offset**: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been copied so far and + * the buffer offset for copying the next record. + * + * Copy as many buffered i915 perf samples and records for this stream + * to userspace as will fit in the given buffer. + * + * Only write complete records; returning -%ENOSPC if there isn't room + * for a complete record. + * + * Return any error condition that results in a short read such as + * -%ENOSPC or -%EFAULT, even though these may be squashed before + * returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @destroy: Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +/** + * struct i915_perf_stream - state for a single open stream FD + */ +struct i915_perf_stream { + /** + * @dev_priv: i915 drm device + */ + struct drm_i915_private *dev_priv; + + /** + * @wakeref: As we keep the device awake while the perf stream is + * active, we track our runtime pm reference for later release. + */ + intel_wakeref_t wakeref; + + /** + * @engine: Engine associated with this performance stream. + */ + struct intel_engine_cs *engine; + + /** + * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` + * properties given when opening a stream, representing the contents + * of a single sample as read() by userspace. + */ + u32 sample_flags; + + /** + * @sample_size: Considering the configured contents of a sample + * combined with the required header size, this is the total size + * of a single sample record. + */ + int sample_size; + + /** + * @ctx: %NULL if measuring system-wide across all contexts or a + * specific context that is being monitored. + */ + struct i915_gem_context *ctx; + + /** + * @enabled: Whether the stream is currently enabled, considering + * whether the stream was opened in a disabled state and based + * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. + */ + bool enabled; + + /** + * @ops: The callbacks providing the implementation of this specific + * type of configured stream. + */ + const struct i915_perf_stream_ops *ops; + + /** + * @oa_config: The OA configuration used by the stream. + */ + struct i915_oa_config *oa_config; + + /** + * The OA context specific information. + */ + struct intel_context *pinned_ctx; + u32 specific_ctx_id; + u32 specific_ctx_id_mask; + + struct hrtimer poll_check_timer; + wait_queue_head_t poll_wq; + bool pollin; + + bool periodic; + int period_exponent; + + /** + * State of the OA buffer. + */ + struct { + struct i915_vma *vma; + u8 *vaddr; + u32 last_ctx_id; + int format; + int format_size; + int size_exponent; + + /** + * Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state needs to be read + * consistently from a hrtimer callback (atomic context) and + * read() fop (user context) with tail pointer updates happening + * in atomic context and head updates in user context and the + * (unlikely) possibility of read() errors needing to reset all + * head/tail state. + * + * Note: Contention/performance aren't currently a significant + * concern here considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that reads typically only + * happen in response to a hrtimer event and likely complete + * before the next callback. + * + * Note: This lock is not held *while* reading and copying data + * to userspace so the value of head observed in htrimer + * callbacks won't represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * One 'aging' tail pointer and one 'aged' tail pointer ready to + * used for reading. + * + * Initial values of 0xffffffff are invalid and imply that an + * update is required (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * Index for the aged tail ready to read() data up to. + */ + unsigned int aged_tail_idx; + + /** + * A monotonic timestamp for when the current aging tail pointer + * was read; used to determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** + * Although we can always read back the head pointer register, + * we prefer to avoid trusting the HW state, just to avoid any + * risk that some hardware condition could * somehow bump the + * head pointer unpredictably and cause us to forward the wrong + * OA buffer data to userspace. + */ + u32 head; + } oa_buffer; +}; + +/** + * struct i915_oa_ops - Gen specific implementation of an OA unit stream + */ +struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, + u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as + * disabling EU clock gating as required. + */ + int (*enable_metric_set)(struct i915_perf_stream *stream); + + /** + * @disable_metric_set: Remove system constraints associated with using + * the OA unit. + */ + void (*disable_metric_set)(struct i915_perf_stream *stream); + + /** + * @oa_enable: Enable periodic sampling + */ + void (*oa_enable)(struct i915_perf_stream *stream); + + /** + * @oa_disable: Disable periodic sampling + */ + void (*oa_disable)(struct i915_perf_stream *stream); + + /** + * @read: Copy data from the circular OA buffer into a given userspace + * buffer. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @oa_hw_tail_read: read the OA tail pointer register + * + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. + */ + u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); +}; + +#endif /* _I915_PERF_TYPES_H_ */
Following a pattern used throughout the driver. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 300 +---------------------- drivers/gpu/drm/i915/i915_perf.h | 2 + drivers/gpu/drm/i915/i915_perf_types.h | 327 +++++++++++++++++++++++++ 3 files changed, 330 insertions(+), 299 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h