@@ -1679,6 +1679,7 @@ struct i915_gen_pmu_node {
struct list_head head;
struct drm_i915_gem_request *req;
u32 offset;
+ bool discard;
u32 ctx_id;
};
@@ -2008,6 +2009,7 @@ struct drm_i915_private {
} buffer;
struct list_head node_list;
struct work_struct work_timer;
+ struct work_struct work_event_destroy;
} gen_pmu;
void (*insert_profile_cmd[I915_PROFILE_MAX])
@@ -441,6 +441,36 @@ int i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
return 0;
}
+void i915_gen_pmu_release_request_ref(struct drm_i915_private *dev_priv)
+{
+ struct i915_gen_pmu_node *entry, *next;
+ struct drm_i915_gem_request *req;
+ unsigned long lock_flags;
+ int ret;
+
+ list_for_each_entry_safe
+ (entry, next, &dev_priv->gen_pmu.node_list, head) {
+ req = entry->req;
+ if (req) {
+ ret = i915_mutex_lock_interruptible(dev_priv->dev);
+ if (ret)
+ break;
+ i915_gem_request_assign(&entry->req, NULL);
+ mutex_unlock(&dev_priv->dev->struct_mutex);
+ }
+
+ /*
+ * This fn won't be running concurrently with forward snapshots
+ * work fn. These are the only two places where list entries
+ * will be deleted. So no need of protecting full loop?
+ */
+ spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+ list_del(&entry->head);
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
+ kfree(entry);
+ }
+}
+
static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
struct i915_gen_pmu_node *node)
{
@@ -479,11 +509,19 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
unsigned long lock_flags;
int ret;
+ spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+ if (dev_priv->gen_pmu.event_active == false) {
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
+
list_for_each_entry_safe
(entry, next, &dev_priv->gen_pmu.node_list, head) {
req = entry->req;
if (req && i915_gem_request_completed(req, true)) {
- forward_one_gen_pmu_sample(dev_priv, entry);
+ if (!entry->discard)
+ forward_one_gen_pmu_sample(dev_priv, entry);
ret = i915_mutex_lock_interruptible(dev_priv->dev);
if (ret)
break;
@@ -667,18 +705,11 @@ out:
static void gen_buffer_destroy(struct drm_i915_private *i915)
{
- unsigned long lock_flags;
-
mutex_lock(&i915->dev->struct_mutex);
vunmap(i915->gen_pmu.buffer.addr);
i915_gem_object_ggtt_unpin(i915->gen_pmu.buffer.obj);
drm_gem_object_unreference(&i915->gen_pmu.buffer.obj->base);
mutex_unlock(&i915->dev->struct_mutex);
-
- spin_lock_irqsave(&i915->gen_pmu.lock, lock_flags);
- i915->gen_pmu.buffer.obj = NULL;
- i915->gen_pmu.buffer.addr = NULL;
- spin_unlock_irqrestore(&i915->gen_pmu.lock, lock_flags);
}
static void i915_gen_event_destroy(struct perf_event *event)
@@ -688,10 +719,44 @@ static void i915_gen_event_destroy(struct perf_event *event)
WARN_ON(event->parent);
- gen_buffer_destroy(i915);
+ cancel_work_sync(&i915->gen_pmu.work_timer);
BUG_ON(i915->gen_pmu.exclusive_event != event);
i915->gen_pmu.exclusive_event = NULL;
+
+ /* We can deference our local copy of dest buffer here, since
+ * an active reference of buffer would be taken while
+ * inserting commands. So the buffer would be freed up only
+ * after GPU is done with it.
+ */
+ gen_buffer_destroy(i915);
+
+ schedule_work(&i915->gen_pmu.work_event_destroy);
+}
+
+void i915_gen_pmu_event_destroy_work(struct work_struct *__work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(__work, typeof(*dev_priv),
+ gen_pmu.work_event_destroy);
+ unsigned long lock_flags;
+ int ret;
+
+ ret = i915_gen_pmu_wait_gpu(dev_priv);
+ if (ret)
+ goto out;
+
+ i915_gen_pmu_release_request_ref(dev_priv);
+
+out:
+ /*
+ * Done at end, as this excludes a new event till we've done processing
+ * the old one
+ */
+ spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+ dev_priv->gen_pmu.buffer.obj = NULL;
+ dev_priv->gen_pmu.buffer.addr = NULL;
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
}
static int alloc_obj(struct drm_i915_private *dev_priv,
@@ -1412,6 +1477,7 @@ static int i915_gen_event_init(struct perf_event *event)
{
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+ unsigned long lock_flags;
int ret = 0;
if (event->attr.type != event->pmu->type)
@@ -1420,8 +1486,12 @@ static int i915_gen_event_init(struct perf_event *event)
/* To avoid the complexity of having to accurately filter
* data and marshal to the appropriate client
* we currently only allow exclusive access */
- if (dev_priv->gen_pmu.buffer.obj)
+ spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+ if (dev_priv->gen_pmu.buffer.obj) {
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
return -EBUSY;
+ }
+ spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
/*
* We need to check for CAP_SYS_ADMIN capability as we profile all
@@ -1464,16 +1534,18 @@ static void i915_gen_event_stop(struct perf_event *event, int flags)
{
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+ struct i915_gen_pmu_node *entry;
unsigned long lock_flags;
+ hrtimer_cancel(&dev_priv->gen_pmu.timer);
+ gen_pmu_flush_snapshots(dev_priv);
+
spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
dev_priv->gen_pmu.event_active = false;
-
+ list_for_each_entry(entry, &dev_priv->gen_pmu.node_list, head)
+ entry->discard = true;
spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
- hrtimer_cancel(&dev_priv->gen_pmu.timer);
- gen_pmu_flush_snapshots(dev_priv);
-
event->hw.state = PERF_HES_STOPPED;
}
@@ -1660,6 +1732,9 @@ void i915_gen_pmu_register(struct drm_device *dev)
i915->gen_pmu.timer.function = hrtimer_sample_gen;
INIT_WORK(&i915->gen_pmu.work_timer, forward_gen_pmu_snapshots_work);
+ INIT_WORK(&i915->gen_pmu.work_event_destroy,
+ i915_gen_pmu_event_destroy_work);
+
spin_lock_init(&i915->gen_pmu.lock);
i915->gen_pmu.pmu.capabilities = PERF_PMU_CAP_IS_DEVICE;
@@ -1690,6 +1765,7 @@ void i915_gen_pmu_unregister(struct drm_device *dev)
return;
cancel_work_sync(&i915->gen_pmu.work_timer);
+ cancel_work_sync(&i915->gen_pmu.work_event_destroy);
perf_pmu_unregister(&i915->gen_pmu.pmu);
i915->gen_pmu.pmu.event_init = NULL;