@@ -2021,6 +2021,7 @@ struct drm_i915_private {
u32 head;
u32 tail;
} buffer;
+ struct work_struct work_timer;
} gen_pmu;
struct list_head profile_cmd;
@@ -224,11 +224,121 @@ void forward_oa_async_snapshots_work(struct work_struct *__work)
mutex_unlock(&dev_priv->dev->struct_mutex);
}
+static void init_gen_pmu_buf_queue(struct drm_i915_private *dev_priv)
+{
+ struct drm_i915_ts_queue_header *hdr =
+ (struct drm_i915_ts_queue_header *)
+ dev_priv->gen_pmu.buffer.addr;
+ void *data_ptr;
+
+ hdr->size_in_bytes = dev_priv->gen_pmu.buffer.obj->base.size;
+ /* 8 byte alignment for node address */
+ data_ptr = PTR_ALIGN((void *)(hdr + 1), 8);
+ hdr->data_offset = (__u64)(data_ptr - (void *)hdr);
+
+ hdr->node_count = 0;
+ hdr->wrap_count = 0;
+}
+
+static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
+ struct drm_i915_ts_node *node)
+{
+ struct perf_sample_data data;
+ struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
+ int snapshot_size = sizeof(struct drm_i915_ts_usernode);
+ struct perf_raw_record raw;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ /* Note: the combined u32 raw->size member + raw data itself must be 8
+ * byte aligned.*/
+ raw.size = snapshot_size + 4;
+ raw.data = node;
+
+ data.raw = &raw;
+
+ perf_event_overflow(event, &data, &dev_priv->gen_pmu.dummy_regs);
+}
+
+void i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
+{
+ struct drm_i915_ts_queue_header *hdr =
+ (struct drm_i915_ts_queue_header *)
+ dev_priv->gen_pmu.buffer.addr;
+ struct drm_i915_ts_node *first_node, *node;
+ int head, tail, num_nodes, ret;
+ struct drm_i915_gem_request *req;
+
+ first_node = (struct drm_i915_ts_node *)
+ ((char *)hdr + hdr->data_offset);
+ num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
+ sizeof(*node);
+
+ tail = hdr->node_count;
+ head = dev_priv->gen_pmu.buffer.head;
+
+ /* wait for all requests to complete*/
+ while ((head % num_nodes) != (tail % num_nodes)) {
+ node = &first_node[head % num_nodes];
+ req = node->node_info.req;
+ if (req) {
+ if (!i915_gem_request_completed(req, true)) {
+ ret = i915_wait_request(req);
+ if (ret)
+ DRM_DEBUG_DRIVER(
+ "gen pmu: failed to wait\n");
+ }
+ i915_gem_request_assign(&node->node_info.req, NULL);
+ }
+ head++;
+ }
+}
+
+void forward_gen_pmu_snapshots_work(struct work_struct *__work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(__work, typeof(*dev_priv),
+ gen_pmu.work_timer);
+ struct drm_i915_ts_queue_header *hdr =
+ (struct drm_i915_ts_queue_header *)
+ dev_priv->gen_pmu.buffer.addr;
+ struct drm_i915_ts_node *first_node, *node;
+ int head, tail, num_nodes, ret;
+ struct drm_i915_gem_request *req;
+
+ first_node = (struct drm_i915_ts_node *)
+ ((char *)hdr + hdr->data_offset);
+ num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
+ sizeof(*node);
+
+ ret = i915_mutex_lock_interruptible(dev_priv->dev);
+ if (ret)
+ return;
+
+ tail = hdr->node_count;
+ head = dev_priv->gen_pmu.buffer.head;
+
+ while ((head % num_nodes) != (tail % num_nodes)) {
+ node = &first_node[head % num_nodes];
+ req = node->node_info.req;
+ if (req && i915_gem_request_completed(req, true)) {
+ forward_one_gen_pmu_sample(dev_priv, node);
+ i915_gem_request_assign(&node->node_info.req, NULL);
+ head++;
+ } else
+ break;
+ }
+
+ dev_priv->gen_pmu.buffer.tail = tail;
+ dev_priv->gen_pmu.buffer.head = head;
+
+ mutex_unlock(&dev_priv->dev->struct_mutex);
+}
+
static void gen_pmu_flush_snapshots(struct drm_i915_private *dev_priv)
{
WARN_ON(!dev_priv->gen_pmu.buffer.addr);
-
- /* TODO: routine for forwarding snapshots to userspace */
+ schedule_work(&dev_priv->gen_pmu.work_timer);
}
static void forward_one_oa_snapshot_to_event(struct drm_i915_private *dev_priv,
@@ -652,6 +762,7 @@ static int init_gen_pmu_buffer(struct perf_event *event)
dev_priv->gen_pmu.buffer.obj = bo;
dev_priv->gen_pmu.buffer.addr = vmap_oa_buffer(bo);
+ init_gen_pmu_buf_queue(dev_priv);
DRM_DEBUG_DRIVER("Gen PMU Buffer initialized, vaddr = %p",
dev_priv->gen_pmu.buffer.addr);
@@ -1327,6 +1438,13 @@ static void i915_gen_event_flush(struct perf_event *event)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+ int ret;
+
+ ret = i915_mutex_lock_interruptible(i915->dev);
+ if (ret)
+ return;
+ i915_gen_pmu_wait_gpu(i915);
+ mutex_unlock(&i915->dev->struct_mutex);
gen_pmu_flush_snapshots(i915);
}
@@ -1476,6 +1594,7 @@ void i915_gen_pmu_register(struct drm_device *dev)
hrtimer_init(&i915->gen_pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
i915->gen_pmu.timer.function = hrtimer_sample_gen;
+ INIT_WORK(&i915->gen_pmu.work_timer, forward_gen_pmu_snapshots_work);
spin_lock_init(&i915->gen_pmu.lock);
i915->gen_pmu.pmu.capabilities = PERF_PMU_CAP_IS_DEVICE;
@@ -1505,6 +1624,8 @@ void i915_gen_pmu_unregister(struct drm_device *dev)
if (i915->gen_pmu.pmu.event_init == NULL)
return;
+ cancel_work_sync(&i915->gen_pmu.work_timer);
+
perf_pmu_unregister(&i915->gen_pmu.pmu);
i915->gen_pmu.pmu.event_init = NULL;
}