@@ -92,6 +92,7 @@
#include "i915_gem_fence_reg.h"
#include "i915_gem_gtt.h"
#include "i915_gpu_error.h"
+#include "i915_perf_types.h"
#include "i915_request.h"
#include "i915_scheduler.h"
#include "gt/intel_timeline.h"
@@ -979,305 +980,6 @@ struct intel_wm_config {
bool sprites_scaled;
};
-struct i915_oa_format {
- u32 format;
- int size;
-};
-
-struct i915_oa_reg {
- i915_reg_t addr;
- u32 value;
-};
-
-struct i915_oa_config {
- char uuid[UUID_STRING_LEN + 1];
- int id;
-
- const struct i915_oa_reg *mux_regs;
- u32 mux_regs_len;
- const struct i915_oa_reg *b_counter_regs;
- u32 b_counter_regs_len;
- const struct i915_oa_reg *flex_regs;
- u32 flex_regs_len;
-
- struct attribute_group sysfs_metric;
- struct attribute *attrs[2];
- struct device_attribute sysfs_metric_id;
-
- atomic_t ref_count;
-};
-
-struct i915_perf_stream;
-
-/**
- * struct i915_perf_stream_ops - the OPs to support a specific stream type
- */
-struct i915_perf_stream_ops {
- /**
- * @enable: Enables the collection of HW samples, either in response to
- * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
- * without `I915_PERF_FLAG_DISABLED`.
- */
- void (*enable)(struct i915_perf_stream *stream);
-
- /**
- * @disable: Disables the collection of HW samples, either in response
- * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
- * the stream.
- */
- void (*disable)(struct i915_perf_stream *stream);
-
- /**
- * @poll_wait: Call poll_wait, passing a wait queue that will be woken
- * once there is something ready to read() for the stream
- */
- void (*poll_wait)(struct i915_perf_stream *stream,
- struct file *file,
- poll_table *wait);
-
- /**
- * @wait_unlocked: For handling a blocking read, wait until there is
- * something to ready to read() for the stream. E.g. wait on the same
- * wait queue that would be passed to poll_wait().
- */
- int (*wait_unlocked)(struct i915_perf_stream *stream);
-
- /**
- * @read: Copy buffered metrics as records to userspace
- * **buf**: the userspace, destination buffer
- * **count**: the number of bytes to copy, requested by userspace
- * **offset**: zero at the start of the read, updated as the read
- * proceeds, it represents how many bytes have been copied so far and
- * the buffer offset for copying the next record.
- *
- * Copy as many buffered i915 perf samples and records for this stream
- * to userspace as will fit in the given buffer.
- *
- * Only write complete records; returning -%ENOSPC if there isn't room
- * for a complete record.
- *
- * Return any error condition that results in a short read such as
- * -%ENOSPC or -%EFAULT, even though these may be squashed before
- * returning to userspace.
- */
- int (*read)(struct i915_perf_stream *stream,
- char __user *buf,
- size_t count,
- size_t *offset);
-
- /**
- * @destroy: Cleanup any stream specific resources.
- *
- * The stream will always be disabled before this is called.
- */
- void (*destroy)(struct i915_perf_stream *stream);
-};
-
-/**
- * struct i915_perf_stream - state for a single open stream FD
- */
-struct i915_perf_stream {
- /**
- * @dev_priv: i915 drm device
- */
- struct drm_i915_private *dev_priv;
-
- /**
- * @wakeref: As we keep the device awake while the perf stream is
- * active, we track our runtime pm reference for later release.
- */
- intel_wakeref_t wakeref;
-
- /**
- * @engine: Engine associated with this performance stream.
- */
- struct intel_engine_cs *engine;
-
- /**
- * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
- * properties given when opening a stream, representing the contents
- * of a single sample as read() by userspace.
- */
- u32 sample_flags;
-
- /**
- * @sample_size: Considering the configured contents of a sample
- * combined with the required header size, this is the total size
- * of a single sample record.
- */
- int sample_size;
-
- /**
- * @ctx: %NULL if measuring system-wide across all contexts or a
- * specific context that is being monitored.
- */
- struct i915_gem_context *ctx;
-
- /**
- * @enabled: Whether the stream is currently enabled, considering
- * whether the stream was opened in a disabled state and based
- * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
- */
- bool enabled;
-
- /**
- * @ops: The callbacks providing the implementation of this specific
- * type of configured stream.
- */
- const struct i915_perf_stream_ops *ops;
-
- /**
- * @oa_config: The OA configuration used by the stream.
- */
- struct i915_oa_config *oa_config;
-
- /**
- * The OA context specific information.
- */
- struct intel_context *pinned_ctx;
- u32 specific_ctx_id;
- u32 specific_ctx_id_mask;
-
- struct hrtimer poll_check_timer;
- wait_queue_head_t poll_wq;
- bool pollin;
-
- bool periodic;
- int period_exponent;
-
- /**
- * State of the OA buffer.
- */
- struct {
- struct i915_vma *vma;
- u8 *vaddr;
- u32 last_ctx_id;
- int format;
- int format_size;
- int size_exponent;
-
- /**
- * Locks reads and writes to all head/tail state
- *
- * Consider: the head and tail pointer state needs to be read
- * consistently from a hrtimer callback (atomic context) and
- * read() fop (user context) with tail pointer updates happening
- * in atomic context and head updates in user context and the
- * (unlikely) possibility of read() errors needing to reset all
- * head/tail state.
- *
- * Note: Contention/performance aren't currently a significant
- * concern here considering the relatively low frequency of
- * hrtimer callbacks (5ms period) and that reads typically only
- * happen in response to a hrtimer event and likely complete
- * before the next callback.
- *
- * Note: This lock is not held *while* reading and copying data
- * to userspace so the value of head observed in htrimer
- * callbacks won't represent any partial consumption of data.
- */
- spinlock_t ptr_lock;
-
- /**
- * One 'aging' tail pointer and one 'aged' tail pointer ready to
- * used for reading.
- *
- * Initial values of 0xffffffff are invalid and imply that an
- * update is required (and should be ignored by an attempted
- * read)
- */
- struct {
- u32 offset;
- } tails[2];
-
- /**
- * Index for the aged tail ready to read() data up to.
- */
- unsigned int aged_tail_idx;
-
- /**
- * A monotonic timestamp for when the current aging tail pointer
- * was read; used to determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
- /**
- * Although we can always read back the head pointer register,
- * we prefer to avoid trusting the HW state, just to avoid any
- * risk that some hardware condition could * somehow bump the
- * head pointer unpredictably and cause us to forward the wrong
- * OA buffer data to userspace.
- */
- u32 head;
- } oa_buffer;
-};
-
-/**
- * struct i915_oa_ops - Gen specific implementation of an OA unit stream
- */
-struct i915_oa_ops {
- /**
- * @is_valid_b_counter_reg: Validates register's address for
- * programming boolean counters for a particular platform.
- */
- bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
- u32 addr);
-
- /**
- * @is_valid_mux_reg: Validates register's address for programming mux
- * for a particular platform.
- */
- bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
- /**
- * @is_valid_flex_reg: Validates register's address for programming
- * flex EU filtering for a particular platform.
- */
- bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
- /**
- * @enable_metric_set: Selects and applies any MUX configuration to set
- * up the Boolean and Custom (B/C) counters that are part of the
- * counter reports being sampled. May apply system constraints such as
- * disabling EU clock gating as required.
- */
- int (*enable_metric_set)(struct i915_perf_stream *stream);
-
- /**
- * @disable_metric_set: Remove system constraints associated with using
- * the OA unit.
- */
- void (*disable_metric_set)(struct i915_perf_stream *stream);
-
- /**
- * @oa_enable: Enable periodic sampling
- */
- void (*oa_enable)(struct i915_perf_stream *stream);
-
- /**
- * @oa_disable: Disable periodic sampling
- */
- void (*oa_disable)(struct i915_perf_stream *stream);
-
- /**
- * @read: Copy data from the circular OA buffer into a given userspace
- * buffer.
- */
- int (*read)(struct i915_perf_stream *stream,
- char __user *buf,
- size_t count,
- size_t *offset);
-
- /**
- * @oa_hw_tail_read: read the OA tail pointer register
- *
- * In particular this enables us to share all the fiddly code for
- * handling the OA unit tail pointer race that affects multiple
- * generations.
- */
- u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
-};
-
struct intel_cdclk_state {
unsigned int cdclk, vco, ref, bypass;
u8 voltage_level;
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#include "i915_perf_types.h"
+
struct drm_device;
struct drm_file;
struct drm_i915_private;
new file mode 100644
@@ -0,0 +1,318 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_PERF_TYPES_H_
+#define _I915_PERF_TYPES_H_
+
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/hrtimer.h>
+#include <linux/sysfs.h>
+#include <linux/wait.h>
+
+struct drm_i915_private;
+struct file;
+
+struct i915_oa_format {
+ u32 format;
+ int size;
+};
+
+struct i915_oa_reg {
+ i915_reg_t addr;
+ u32 value;
+};
+
+struct i915_oa_config {
+ char uuid[UUID_STRING_LEN + 1];
+ int id;
+
+ const struct i915_oa_reg *mux_regs;
+ u32 mux_regs_len;
+ const struct i915_oa_reg *b_counter_regs;
+ u32 b_counter_regs_len;
+ const struct i915_oa_reg *flex_regs;
+ u32 flex_regs_len;
+
+ struct attribute_group sysfs_metric;
+ struct attribute *attrs[2];
+ struct device_attribute sysfs_metric_id;
+
+ atomic_t ref_count;
+};
+
+struct i915_perf_stream;
+
+/**
+ * struct i915_perf_stream_ops - the OPs to support a specific stream type
+ */
+struct i915_perf_stream_ops {
+ /**
+ * @enable: Enables the collection of HW samples, either in response to
+ * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
+ * without `I915_PERF_FLAG_DISABLED`.
+ */
+ void (*enable)(struct i915_perf_stream *stream);
+
+ /**
+ * @disable: Disables the collection of HW samples, either in response
+ * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
+ * the stream.
+ */
+ void (*disable)(struct i915_perf_stream *stream);
+
+ /**
+ * @poll_wait: Call poll_wait, passing a wait queue that will be woken
+ * once there is something ready to read() for the stream
+ */
+ void (*poll_wait)(struct i915_perf_stream *stream,
+ struct file *file,
+ poll_table *wait);
+
+ /**
+ * @wait_unlocked: For handling a blocking read, wait until there is
+ * something to ready to read() for the stream. E.g. wait on the same
+ * wait queue that would be passed to poll_wait().
+ */
+ int (*wait_unlocked)(struct i915_perf_stream *stream);
+
+ /**
+ * @read: Copy buffered metrics as records to userspace
+ * **buf**: the userspace, destination buffer
+ * **count**: the number of bytes to copy, requested by userspace
+ * **offset**: zero at the start of the read, updated as the read
+ * proceeds, it represents how many bytes have been copied so far and
+ * the buffer offset for copying the next record.
+ *
+ * Copy as many buffered i915 perf samples and records for this stream
+ * to userspace as will fit in the given buffer.
+ *
+ * Only write complete records; returning -%ENOSPC if there isn't room
+ * for a complete record.
+ *
+ * Return any error condition that results in a short read such as
+ * -%ENOSPC or -%EFAULT, even though these may be squashed before
+ * returning to userspace.
+ */
+ int (*read)(struct i915_perf_stream *stream,
+ char __user *buf,
+ size_t count,
+ size_t *offset);
+
+ /**
+ * @destroy: Cleanup any stream specific resources.
+ *
+ * The stream will always be disabled before this is called.
+ */
+ void (*destroy)(struct i915_perf_stream *stream);
+};
+
+/**
+ * struct i915_perf_stream - state for a single open stream FD
+ */
+struct i915_perf_stream {
+ /**
+ * @dev_priv: i915 drm device
+ */
+ struct drm_i915_private *dev_priv;
+
+ /**
+ * @wakeref: As we keep the device awake while the perf stream is
+ * active, we track our runtime pm reference for later release.
+ */
+ intel_wakeref_t wakeref;
+
+ /**
+ * @engine: Engine associated with this performance stream.
+ */
+ struct intel_engine_cs *engine;
+
+ /**
+ * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
+ * properties given when opening a stream, representing the contents
+ * of a single sample as read() by userspace.
+ */
+ u32 sample_flags;
+
+ /**
+ * @sample_size: Considering the configured contents of a sample
+ * combined with the required header size, this is the total size
+ * of a single sample record.
+ */
+ int sample_size;
+
+ /**
+ * @ctx: %NULL if measuring system-wide across all contexts or a
+ * specific context that is being monitored.
+ */
+ struct i915_gem_context *ctx;
+
+ /**
+ * @enabled: Whether the stream is currently enabled, considering
+ * whether the stream was opened in a disabled state and based
+ * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
+ */
+ bool enabled;
+
+ /**
+ * @ops: The callbacks providing the implementation of this specific
+ * type of configured stream.
+ */
+ const struct i915_perf_stream_ops *ops;
+
+ /**
+ * @oa_config: The OA configuration used by the stream.
+ */
+ struct i915_oa_config *oa_config;
+
+ /**
+ * The OA context specific information.
+ */
+ struct intel_context *pinned_ctx;
+ u32 specific_ctx_id;
+ u32 specific_ctx_id_mask;
+
+ struct hrtimer poll_check_timer;
+ wait_queue_head_t poll_wq;
+ bool pollin;
+
+ bool periodic;
+ int period_exponent;
+
+ /**
+ * State of the OA buffer.
+ */
+ struct {
+ struct i915_vma *vma;
+ u8 *vaddr;
+ u32 last_ctx_id;
+ int format;
+ int format_size;
+ int size_exponent;
+
+ /**
+ * Locks reads and writes to all head/tail state
+ *
+ * Consider: the head and tail pointer state needs to be read
+ * consistently from a hrtimer callback (atomic context) and
+ * read() fop (user context) with tail pointer updates happening
+ * in atomic context and head updates in user context and the
+ * (unlikely) possibility of read() errors needing to reset all
+ * head/tail state.
+ *
+ * Note: Contention/performance aren't currently a significant
+ * concern here considering the relatively low frequency of
+ * hrtimer callbacks (5ms period) and that reads typically only
+ * happen in response to a hrtimer event and likely complete
+ * before the next callback.
+ *
+ * Note: This lock is not held *while* reading and copying data
+ * to userspace so the value of head observed in htrimer
+ * callbacks won't represent any partial consumption of data.
+ */
+ spinlock_t ptr_lock;
+
+ /**
+ * One 'aging' tail pointer and one 'aged' tail pointer ready to
+ * used for reading.
+ *
+ * Initial values of 0xffffffff are invalid and imply that an
+ * update is required (and should be ignored by an attempted
+ * read)
+ */
+ struct {
+ u32 offset;
+ } tails[2];
+
+ /**
+ * Index for the aged tail ready to read() data up to.
+ */
+ unsigned int aged_tail_idx;
+
+ /**
+ * A monotonic timestamp for when the current aging tail pointer
+ * was read; used to determine when it is old enough to trust.
+ */
+ u64 aging_timestamp;
+
+ /**
+ * Although we can always read back the head pointer register,
+ * we prefer to avoid trusting the HW state, just to avoid any
+ * risk that some hardware condition could * somehow bump the
+ * head pointer unpredictably and cause us to forward the wrong
+ * OA buffer data to userspace.
+ */
+ u32 head;
+ } oa_buffer;
+};
+
+/**
+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream
+ */
+struct i915_oa_ops {
+ /**
+ * @is_valid_b_counter_reg: Validates register's address for
+ * programming boolean counters for a particular platform.
+ */
+ bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
+ u32 addr);
+
+ /**
+ * @is_valid_mux_reg: Validates register's address for programming mux
+ * for a particular platform.
+ */
+ bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
+
+ /**
+ * @is_valid_flex_reg: Validates register's address for programming
+ * flex EU filtering for a particular platform.
+ */
+ bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
+
+ /**
+ * @enable_metric_set: Selects and applies any MUX configuration to set
+ * up the Boolean and Custom (B/C) counters that are part of the
+ * counter reports being sampled. May apply system constraints such as
+ * disabling EU clock gating as required.
+ */
+ int (*enable_metric_set)(struct i915_perf_stream *stream);
+
+ /**
+ * @disable_metric_set: Remove system constraints associated with using
+ * the OA unit.
+ */
+ void (*disable_metric_set)(struct i915_perf_stream *stream);
+
+ /**
+ * @oa_enable: Enable periodic sampling
+ */
+ void (*oa_enable)(struct i915_perf_stream *stream);
+
+ /**
+ * @oa_disable: Disable periodic sampling
+ */
+ void (*oa_disable)(struct i915_perf_stream *stream);
+
+ /**
+ * @read: Copy data from the circular OA buffer into a given userspace
+ * buffer.
+ */
+ int (*read)(struct i915_perf_stream *stream,
+ char __user *buf,
+ size_t count,
+ size_t *offset);
+
+ /**
+ * @oa_hw_tail_read: read the OA tail pointer register
+ *
+ * In particular this enables us to share all the fiddly code for
+ * handling the OA unit tail pointer race that affects multiple
+ * generations.
+ */
+ u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
+};
+
+#endif /* _I915_PERF_TYPES_H_ */
Following a pattern used throughout the driver. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 300 +---------------------- drivers/gpu/drm/i915/i915_perf.h | 2 + drivers/gpu/drm/i915/i915_perf_types.h | 318 +++++++++++++++++++++++++ 3 files changed, 321 insertions(+), 299 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h