@@ -463,6 +463,7 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
*/
static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
+ u64 half_full_count = atomic64_read(&stream->half_full_count);
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format_size;
unsigned long flags;
@@ -539,6 +540,8 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
stream->oa_buffer.head - gtt_offset) >= report_size;
+ stream->half_full_count_last = half_full_count;
+
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
return pollin;
@@ -556,16 +559,23 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
* waiting on an event to occur. These checks are redundant when hrtimer events
* will call oa_buffer_check_unlocked to update the oa_buffer tail pointers. The
* redundant checks add cpu overhead. We simplify the check to reduce cpu
- * overhead.
+ * overhead. For interrupt events, we still need to make sure that
+ * oa_buffer_check_unlocked is called when an interrupt occurs.
*/
static bool oa_buffer_check_reports(struct i915_perf_stream *stream)
{
unsigned long flags;
bool available;
- spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- available = stream->oa_buffer.tail != stream->oa_buffer.head;
- spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+ if (!stream->oa_interrupt_monitor) {
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+ available = stream->oa_buffer.tail != stream->oa_buffer.head;
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+ } else {
+ if (stream->half_full_count_last !=
+ atomic64_read(&stream->half_full_count))
+ available = oa_buffer_check_unlocked(stream);
+ }
return available;
}
@@ -1163,8 +1173,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
* @stream: An i915-perf stream opened for OA metrics
*
* Called when userspace tries to read() from a blocking stream FD opened
- * for OA metrics. It waits until the hrtimer callback finds a non-empty
- * OA buffer and wakes us.
+ * for OA metrics. It waits until either the hrtimer callback finds a non-empty
+ * OA buffer or the OA interrupt kicks in and wakes us.
*
* Note: it's acceptable to have this return with some false positives
* since any subsequent read handling will return -EAGAIN if there isn't
@@ -2696,7 +2706,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
stream->perf->ops.oa_enable(stream);
- if (stream->periodic)
+ if (stream->periodic && stream->poll_oa_period)
hrtimer_start(&stream->poll_check_timer,
ns_to_ktime(stream->poll_oa_period),
HRTIMER_MODE_REL_PINNED);
@@ -2770,6 +2780,10 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
{
stream->perf->ops.oa_disable(stream);
+ stream->half_full_count_last = 0;
+ atomic64_set(&stream->half_full_count,
+ stream->half_full_count_last);
+
if (stream->periodic)
hrtimer_cancel(&stream->poll_check_timer);
}
@@ -3137,6 +3151,16 @@ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
stream->ops->poll_wait(stream, file, wait);
+ /*
+ * Only check the half buffer full notifications if requested by the
+ * user.
+ */
+ if (stream->oa_interrupt_monitor &&
+ (stream->half_full_count_last !=
+ atomic64_read(&stream->half_full_count))) {
+ stream->pollin = oa_buffer_check_unlocked(stream);
+ }
+
/* Note: we don't explicitly check whether there's something to read
* here since this path may be very hot depending on what else
* userspace is polling, or on the timeout in use. We rely solely on
@@ -3554,6 +3578,7 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
/**
* read_properties_unlocked - validate + copy userspace stream open properties
* @perf: i915 perf instance
+ * @open_flags: Flags set by userspace for the opening of the stream
* @uprops: The array of u64 key value pairs given by userspace
* @n_props: The number of key value pairs expected in @uprops
* @props: The stream configuration built up while validating properties
@@ -3567,6 +3592,7 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
* rule out defining new properties with ordering requirements in the future.
*/
static int read_properties_unlocked(struct i915_perf *perf,
+ u32 open_flags,
u64 __user *uprops,
u32 n_props,
struct perf_open_properties *props)
@@ -3710,13 +3736,16 @@ static int read_properties_unlocked(struct i915_perf *perf,
break;
}
case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
- if (value < 100000 /* 100us */) {
+ if (value > 0 && value < 100000 /* 100us */) {
DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
value);
return -EINVAL;
}
props->poll_oa_period = value;
break;
+ case DRM_I915_PERF_PROP_OA_ENABLE_INTERRUPT:
+ props->oa_interrupt_monitor = value != 0;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -3725,6 +3754,19 @@ static int read_properties_unlocked(struct i915_perf *perf,
uprop += 2;
}
+ /*
+ * Blocking read need to be waken up by some mechanism. If no polling
+ * of the HEAD/TAIL register is done by the kernel and no interrupt is
+ * enabled, we'll never be able to wake up.
+ */
+ if ((open_flags & I915_PERF_FLAG_FD_NONBLOCK) == 0 &&
+ !props->poll_oa_period &&
+ !props->oa_interrupt_monitor) {
+ DRM_DEBUG("Requesting a blocking stream with no polling period "
+ "& no interrupt.\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -3775,6 +3817,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
}
ret = read_properties_unlocked(perf,
+ param->flags,
u64_to_user_ptr(param->properties_ptr),
param->num_properties,
&props);
@@ -4502,8 +4545,11 @@ int i915_perf_ioctl_version(void)
*
* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
* interval for the hrtimer used to check for OA data.
+ *
+ * 6: Add DRM_I915_PERF_PROP_OA_ENABLE_INTERRUPT parameter to
+ * enable/disable interrupts in OA.
*/
- return 5;
+ return 6;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -1987,12 +1987,23 @@ enum drm_i915_perf_property_id {
* the driver if this parameter is not specified. Note that larger timer
* values will reduce cpu consumption during OA perf captures. However,
* excessively large values would potentially result in OA buffer
- * overwrites as captures reach end of the OA buffer.
+ * overwrites as captures reach end of the OA buffer. A value of 0 means
+ * no hrtimer will be started.
*
* This property is available in perf revision 5.
*/
DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+ /**
+ * Specifying this property sets up the interrupt mechanism for the OA
+ * buffer in i915. This option in conjunction with a long polling period
+ * for avaibility of OA data can reduce CPU load significantly if you
+ * do not care about OA data being read as soon as it's available.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENABLE_INTERRUPT,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};