@@ -1684,6 +1684,7 @@ struct i915_gen_pmu_node {
u32 ctx_id;
u32 ring;
u32 pid;
+ u32 tag;
};
extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -2015,6 +2016,7 @@ struct drm_i915_private {
struct work_struct work_event_destroy;
#define I915_GEN_PMU_SAMPLE_RING (1<<0)
#define I915_GEN_PMU_SAMPLE_PID (1<<1)
+#define I915_GEN_PMU_SAMPLE_TAG (1<<2)
int sample_info_flags;
} gen_pmu;
@@ -126,6 +126,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
entry->ring = ring_id_mask(ring);
if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
entry->pid = current->pid;
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
+ entry->tag = tag;
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
@@ -559,6 +561,7 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
struct drm_i915_ts_node_ctx_id *ctx_info;
struct drm_i915_ts_node_ring_id *ring_info;
struct drm_i915_ts_node_pid *pid_info;
+ struct drm_i915_ts_node_tag *tag_info;
struct perf_raw_record raw;
ts_size = sizeof(struct drm_i915_ts_data);
@@ -583,6 +586,13 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
current_ptr = snapshot + snapshot_size;
}
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG) {
+ tag_info = (struct drm_i915_ts_node_tag *)current_ptr;
+ tag_info->tag = node->tag;
+ snapshot_size += sizeof(*tag_info);
+ current_ptr = snapshot + snapshot_size;
+ }
+
perf_sample_data_init(&data, 0, event->hw.last_period);
/* Note: the combined u32 raw->size member + raw data itself must be 8
@@ -1040,6 +1050,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
node_size += sizeof(struct drm_i915_ts_node_pid);
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
+ node_size += sizeof(struct drm_i915_ts_node_tag);
+
/* size has to be aligned to 8 bytes (required by relevant gpu cmds) */
node_size = ALIGN(node_size, 8);
dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1658,6 +1671,9 @@ static int i915_gen_event_init(struct perf_event *event)
if (gen_attr.sample_pid)
dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_PID;
+ if (gen_attr.sample_tag)
+ dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG;
+
/* To avoid the complexity of having to accurately filter
* data and marshal to the appropriate client
* we currently only allow exclusive access */
@@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
__u32 size;
__u32 sample_ring:1,
sample_pid:1,
- __reserved_1:30;
+ sample_tag:1,
+ __reserved_1:29;
};
/* Header for PERF_RECORD_DEVICE type events */
@@ -169,6 +170,11 @@ struct drm_i915_ts_node_pid {
__u32 pad;
};
+struct drm_i915_ts_node_tag {
+ __u32 tag;
+ __u32 pad;
+};
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use