Message ID | 20160914153256.4121-1-robert@sixbynine.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 14 September 2016 at 16:32, Robert Bragg <robert@sixbynine.org> wrote: > Adds base i915 perf infrastructure for Gen performance metrics. > > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 > properties to configure a stream of metrics and returns a new fd usable > with standard VFS system calls including read() to read typed and sized > records; ioctl() to enable or disable capture and poll() to wait for > data. > > A stream is opened something like: > > uint64_t properties[] = { > /* Single context sampling */ > DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, > > /* Include OA reports in samples */ > DRM_I915_PERF_PROP_SAMPLE_OA, true, > > /* OA unit configuration */ > DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, > DRM_I915_PERF_PROP_OA_FORMAT, report_format, > DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, > }; > struct drm_i915_perf_open_param parm = { > .flags = I915_PERF_FLAG_FD_CLOEXEC | > I915_PERF_FLAG_FD_NONBLOCK | > I915_PERF_FLAG_DISABLED, > .properties_ptr = (uint64_t)properties, > .num_properties = sizeof(properties) / 16, > }; > int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); > > Records read all start with a common { type, size } header with > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records > contain an extensible number of fields and it's the > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that > determine what's included in every sample. > > No specific streams are supported yet so any attempt to open a stream > will return an error. > > v4: > s/DRM_IORW/DRM_IOR/ - Emil Velikov > v3: > update read() interface to avoid passing state struct - Chris Wilson > fix some rebase fallout, with i915-perf init/deinit > v2: > use i915_gem_context_get() - Chris Wilson > > Signed-off-by: Robert Bragg <robert@sixbynine.org> > --- > drivers/gpu/drm/i915/Makefile | 3 + > drivers/gpu/drm/i915/i915_drv.c | 4 + > drivers/gpu/drm/i915/i915_drv.h | 91 ++++++++ > drivers/gpu/drm/i915/i915_perf.c | 448 +++++++++++++++++++++++++++++++++++++++ > include/uapi/drm/i915_drm.h | 67 ++++++ > 5 files changed, 613 insertions(+) > create mode 100644 drivers/gpu/drm/i915/i915_perf.c > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index a998c2b..d991781 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -110,6 +110,9 @@ i915-y += dvo_ch7017.o \ > # virtual gpu code > i915-y += i915_vgpu.o > > +# perf code > +i915-y += i915_perf.o > + > ifeq ($(CONFIG_DRM_I915_GVT),y) > i915-y += intel_gvt.o > include $(src)/gvt/Makefile > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 7f4e8ad..14f22fc 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -838,6 +838,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, > > intel_device_info_dump(dev_priv); > > + i915_perf_init(dev_priv); > + > /* Not all pre-production machines fall into this category, only the > * very first ones. Almost everything should work, except for maybe > * suspend/resume. And we don't implement workarounds that affect only > @@ -859,6 +861,7 @@ err_workqueues: > */ > static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) > { > + i915_perf_fini(dev_priv); > i915_gem_load_cleanup(&dev_priv->drm); > i915_workqueues_cleanup(dev_priv); > } > @@ -2560,6 +2563,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), > }; > > static struct drm_driver driver = { > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 1e2dda8..0f5cd8f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1740,6 +1740,84 @@ struct intel_wm_config { > bool sprites_scaled; > }; > > +struct i915_perf_stream; > + > +struct i915_perf_stream_ops { > + /* Enables the collection of HW samples, either in response to > + * I915_PERF_IOCTL_ENABLE or implicitly called when stream is > + * opened without I915_PERF_FLAG_DISABLED. > + */ > + void (*enable)(struct i915_perf_stream *stream); > + > + /* Disables the collection of HW samples, either in response to > + * I915_PERF_IOCTL_DISABLE or implicitly called before > + * destroying the stream. > + */ > + void (*disable)(struct i915_perf_stream *stream); > + > + /* Return: true if any i915 perf records are ready to read() > + * for this stream. > + */ > + bool (*can_read)(struct i915_perf_stream *stream); > + > + /* Call poll_wait, passing a wait queue that will be woken > + * once there is something ready to read() for the stream > + */ > + void (*poll_wait)(struct i915_perf_stream *stream, > + struct file *file, > + poll_table *wait); > + > + /* For handling a blocking read, wait until there is something > + * to ready to read() for the stream. E.g. wait on the same > + * wait queue that would be passed to poll_wait() until > + * ->can_read() returns true (if its safe to call ->can_read() > + * without the i915 perf lock held). > + */ > + int (*wait_unlocked)(struct i915_perf_stream *stream); > + > + /* read - Copy buffered metrics as records to userspace > + * @buf: the userspace, destination buffer > + * @count: the number of bytes to copy, requested by userspace > + * @offset: zero at the start of the read, updated as the read > + * proceeds, it represents how many bytes have been > + * copied so far and the buffer offset for copying the > + * next record. > + * > + * Copy as many buffered i915 perf samples and records for > + * this stream to userspace as will fit in the given buffer. > + * > + * Only write complete records; returning -ENOSPC if there > + * isn't room for a complete record. > + * > + * Return any error condition that results in a short read > + * such as -ENOSPC or -EFAULT, even though these may be > + * squashed before returning to userspace. > + */ > + int (*read)(struct i915_perf_stream *stream, > + char __user *buf, > + size_t count, > + size_t *offset); > + > + /* Cleanup any stream specific resources. > + * > + * The stream will always be disabled before this is called. > + */ > + void (*destroy)(struct i915_perf_stream *stream); > +}; > + > +struct i915_perf_stream { > + struct drm_i915_private *dev_priv; > + > + struct list_head link; > + > + u32 sample_flags; > + > + struct i915_gem_context *ctx; > + bool enabled; > + > + struct i915_perf_stream_ops *ops; > +}; > + > struct drm_i915_private { > struct drm_device drm; > > @@ -2040,6 +2118,12 @@ struct drm_i915_private { > > struct i915_runtime_pm pm; > > + struct { > + bool initialized; > + struct mutex lock; > + struct list_head streams; > + } perf; > + > /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ > struct { > void (*resume)(struct drm_i915_private *); > @@ -3445,6 +3529,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, > int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > > +int i915_perf_open_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file); > + > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > u64 min_size, u64 alignment, > @@ -3555,6 +3642,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, > u32 batch_len, > bool is_master); > > +/* i915_perf.c */ > +extern void i915_perf_init(struct drm_i915_private *dev_priv); > +extern void i915_perf_fini(struct drm_i915_private *dev_priv); > + > /* i915_suspend.c */ > extern int i915_save_state(struct drm_device *dev); > extern int i915_restore_state(struct drm_device *dev); > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > new file mode 100644 > index 0000000..87530f5 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -0,0 +1,448 @@ > +/* > + * Copyright © 2015-2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Robert Bragg <robert@sixbynine.org> > + */ > + > +#include <linux/anon_inodes.h> > +#include <linux/sizes.h> This should really be introduced in a later patch, but meh. > + > +#include "i915_drv.h" > + > +struct perf_open_properties { > + u32 sample_flags; > + > + u64 single_context:1; > + u64 ctx_handle; > +}; > + > +static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, > + struct file *file, > + char __user *buf, > + size_t count, > + loff_t *ppos) > +{ > + /* Note we keep the offset (aka bytes read) separate from any > + * error status so that the final check for whether we return > + * the bytes read with a higher precedence than any error (see > + * comment below) doesn't need to be handled/duplicated in > + * stream->ops->read() implementations. > + */ > + size_t offset = 0; > + int ret = stream->ops->read(stream, buf, count, &offset); > + > + /* If we've successfully copied any data then reporting that > + * takes precedence over any internal error status, so the > + * data isn't lost. > + * > + * For example ret will be -ENOSPC whenever there is more > + * buffered data than can be copied to userspace, but that's > + * only interesting if we weren't able to copy some data > + * because it implies the userspace buffer is too small to > + * receive a single record (and we never split records). > + * > + * Another case with ret == -EFAULT is more of a grey area > + * since it would seem like bad form for userspace to ask us > + * to overrun its buffer, but the user knows best: > + * > + * http://yarchive.net/comp/linux/partial_reads_writes.html > + */ > + return offset ?: (ret ?: -EAGAIN); > +} > + > +static ssize_t i915_perf_read(struct file *file, > + char __user *buf, > + size_t count, > + loff_t *ppos) > +{ > + struct i915_perf_stream *stream = file->private_data; > + struct drm_i915_private *dev_priv = stream->dev_priv; > + ssize_t ret; > + > + if (!(file->f_flags & O_NONBLOCK)) { > + /* Allow false positives from stream->ops->wait_unlocked. > + */ > + do { > + ret = stream->ops->wait_unlocked(stream); > + if (ret) > + return ret; > + > + mutex_lock(&dev_priv->perf.lock); > + ret = i915_perf_read_locked(stream, file, > + buf, count, ppos); > + mutex_unlock(&dev_priv->perf.lock); > + } while (ret == -EAGAIN); > + } else { > + mutex_lock(&dev_priv->perf.lock); > + ret = i915_perf_read_locked(stream, file, buf, count, ppos); > + mutex_unlock(&dev_priv->perf.lock); > + } > + > + return ret; > +} > + > +static unsigned int i915_perf_poll_locked(struct i915_perf_stream *stream, > + struct file *file, > + poll_table *wait) > +{ > + unsigned int streams = 0; > + > + stream->ops->poll_wait(stream, file, wait); > + > + if (stream->ops->can_read(stream)) > + streams |= POLLIN; > + > + return streams; > +} > + > +static unsigned int i915_perf_poll(struct file *file, poll_table *wait) > +{ > + struct i915_perf_stream *stream = file->private_data; > + struct drm_i915_private *dev_priv = stream->dev_priv; > + int ret; > + > + mutex_lock(&dev_priv->perf.lock); > + ret = i915_perf_poll_locked(stream, file, wait); > + mutex_unlock(&dev_priv->perf.lock); > + > + return ret; > +} > + > +static void i915_perf_enable_locked(struct i915_perf_stream *stream) > +{ > + if (stream->enabled) > + return; > + > + /* Allow stream->ops->enable() to refer to this */ > + stream->enabled = true; > + > + if (stream->ops->enable) > + stream->ops->enable(stream); > +} > + > +static void i915_perf_disable_locked(struct i915_perf_stream *stream) > +{ > + if (!stream->enabled) > + return; > + > + /* Allow stream->ops->disable() to refer to this */ > + stream->enabled = false; > + > + if (stream->ops->disable) > + stream->ops->disable(stream); > +} > + > +static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, > + unsigned int cmd, > + unsigned long arg) > +{ > + switch (cmd) { > + case I915_PERF_IOCTL_ENABLE: > + i915_perf_enable_locked(stream); > + return 0; > + case I915_PERF_IOCTL_DISABLE: > + i915_perf_disable_locked(stream); > + return 0; > + } > + > + return -EINVAL; > +} > + > +static long i915_perf_ioctl(struct file *file, > + unsigned int cmd, > + unsigned long arg) > +{ > + struct i915_perf_stream *stream = file->private_data; > + struct drm_i915_private *dev_priv = stream->dev_priv; > + long ret; > + > + mutex_lock(&dev_priv->perf.lock); > + ret = i915_perf_ioctl_locked(stream, cmd, arg); > + mutex_unlock(&dev_priv->perf.lock); > + > + return ret; > +} > + > +static void i915_perf_destroy_locked(struct i915_perf_stream *stream) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > + if (stream->enabled) > + i915_perf_disable_locked(stream); > + > + if (stream->ops->destroy) > + stream->ops->destroy(stream); > + > + list_del(&stream->link); > + > + if (stream->ctx) { > + mutex_lock(&dev_priv->drm.struct_mutex); > + i915_gem_context_put(stream->ctx); > + mutex_unlock(&dev_priv->drm.struct_mutex); > + } > + > + kfree(stream); > +} > + > +static int i915_perf_release(struct inode *inode, struct file *file) > +{ > + struct i915_perf_stream *stream = file->private_data; > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > + mutex_lock(&dev_priv->perf.lock); > + i915_perf_destroy_locked(stream); > + mutex_unlock(&dev_priv->perf.lock); > + > + return 0; > +} > + > + > +static const struct file_operations fops = { > + .owner = THIS_MODULE, > + .llseek = no_llseek, > + .release = i915_perf_release, > + .poll = i915_perf_poll, > + .read = i915_perf_read, > + .unlocked_ioctl = i915_perf_ioctl, > +}; > + > + > +static struct i915_gem_context * > +lookup_context(struct drm_i915_private *dev_priv, > + struct drm_i915_file_private *file_priv, > + u32 ctx_user_handle) > +{ > + struct i915_gem_context *ctx; > + int ret; > + > + ret = i915_mutex_lock_interruptible(&dev_priv->drm); > + if (ret) > + return ERR_PTR(ret); > + > + ctx = i915_gem_context_lookup(file_priv, ctx_user_handle); > + if (!IS_ERR(ctx)) > + i915_gem_context_get(ctx); > + > + mutex_unlock(&dev_priv->drm.struct_mutex); > + > + return ctx; > +} > + > +int i915_perf_open_ioctl_locked(struct drm_device *dev, > + struct drm_i915_perf_open_param *param, > + struct perf_open_properties *props, > + struct drm_file *file) > +{ This should be static and also let's just make it take dev_priv directly. > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct i915_gem_context *specific_ctx = NULL; > + struct i915_perf_stream *stream = NULL; > + unsigned long f_flags = 0; > + int stream_fd; > + int ret = 0; No need to initialize. > + > + if (props->single_context) { > + u32 ctx_handle = props->ctx_handle; > + struct drm_i915_file_private *file_priv = file->driver_priv; > + > + specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle); > + if (IS_ERR(specific_ctx)) { > + ret = PTR_ERR(specific_ctx); > + if (ret != -EINTR) > + DRM_ERROR("Failed to look up context with ID %u for opening perf stream\n", > + ctx_handle); > + goto err; > + } > + } > + > + if (!specific_ctx && !capable(CAP_SYS_ADMIN)) { > + DRM_ERROR("Insufficient privileges to open system-wide i915 perf stream\n"); > + ret = -EACCES; > + goto err_ctx; > + } > + > + stream = kzalloc(sizeof(*stream), GFP_KERNEL); > + if (!stream) { > + ret = -ENOMEM; > + goto err_ctx; > + } > + > + stream->sample_flags = props->sample_flags; > + stream->dev_priv = dev_priv; > + stream->ctx = specific_ctx; > + > + /* > + * TODO: support sampling something > + * > + * For now this is as far as we can go. > + */ > + DRM_ERROR("Unsupported i915 perf stream configuration\n"); > + ret = -EINVAL; > + goto err_alloc; > + > + list_add(&stream->link, &dev_priv->perf.streams); > + > + if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) > + f_flags |= O_CLOEXEC; > + if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) > + f_flags |= O_NONBLOCK; > + > + stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); > + if (stream_fd < 0) { > + ret = stream_fd; > + goto err_open; > + } > + > + if (!(param->flags & I915_PERF_FLAG_DISABLED)) > + i915_perf_enable_locked(stream); > + > + return stream_fd; > + > +err_open: > + list_del(&stream->link); > + if (stream->ops->destroy) > + stream->ops->destroy(stream); > +err_alloc: > + kfree(stream); > +err_ctx: > + if (specific_ctx) { > + mutex_lock(&dev_priv->drm.struct_mutex); > + i915_gem_context_put(specific_ctx); > + mutex_unlock(&dev_priv->drm.struct_mutex); > + } > +err: > + return ret; > +} > + > +/* Note we copy the properties from userspace outside of the i915 perf > + * mutex to avoid an awkward lockdep with mmap_sem. > + * > + * Note this function only validates properties in isolation it doesn't > + * validate that the combination of properties makes sense or that all > + * properties necessary for a particular kind of stream have been set. > + */ > +static int read_properties_unlocked(struct drm_i915_private *dev_priv, > + u64 __user *uprops, > + u32 n_props, > + struct perf_open_properties *props) > +{ > + u64 __user *uprop = uprops; > + int i; > + > + memset(props, 0, sizeof(struct perf_open_properties)); > + > + if (!n_props) { > + DRM_ERROR("No i915 perf properties given"); > + return -EINVAL; > + } > + > + if (n_props > DRM_I915_PERF_PROP_MAX) { > + DRM_ERROR("More i915 perf properties specified than exist"); > + return -EINVAL; > + } > + > + for (i = 0; i < n_props; i++) { > + u64 id, value; > + int ret; > + > + ret = get_user(id, (u64 __user *)uprop); > + if (ret) > + return ret; > + > + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { > + DRM_ERROR("Unknown i915 perf property ID"); > + return -EINVAL; > + } > + > + ret = get_user(value, (u64 __user *)uprop + 1); > + if (ret) > + return ret; > + > + switch ((enum drm_i915_perf_property_id)id) { > + case DRM_I915_PERF_PROP_CTX_HANDLE: > + props->single_context = 1; > + props->ctx_handle = value; > + break; > + Don't bother with the new line here, it just gets removed later. > + case DRM_I915_PERF_PROP_MAX: > + BUG(); We already handle this case above, but I guess we still need this in order to silence gcc... > + } > + > + uprop += 2; > + } > + > + return 0; > +} > + > +int i915_perf_open_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_perf_open_param *param = data; > + struct perf_open_properties props; > + u32 known_open_flags = 0; No need to initialize. > + int ret; > + > + if (!dev_priv->perf.initialized) { > + DRM_ERROR("i915 perf interface not available for this system"); > + return -ENOTSUPP; > + } > + > + known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | > + I915_PERF_FLAG_FD_NONBLOCK | > + I915_PERF_FLAG_DISABLED; > + if (param->flags & ~known_open_flags) { > + DRM_ERROR("Unknown drm_i915_perf_open_param flag\n"); > + return -EINVAL; > + } > + > + ret = read_properties_unlocked(dev_priv, > + u64_to_user_ptr(param->properties_ptr), > + param->num_properties, > + &props); > + if (ret) > + return ret; > + > + mutex_lock(&dev_priv->perf.lock); > + ret = i915_perf_open_ioctl_locked(dev, param, &props, file); > + mutex_unlock(&dev_priv->perf.lock); > + > + return ret; > +} > + > +void i915_perf_init(struct drm_i915_private *dev_priv) > +{ > + INIT_LIST_HEAD(&dev_priv->perf.streams); > + mutex_init(&dev_priv->perf.lock); > + > + dev_priv->perf.initialized = true; > +} > + > +void i915_perf_fini(struct drm_i915_private *dev_priv) > +{ > + if (!dev_priv->perf.initialized) > + return; > + > + /* Currently nothing to clean up */ > + > + dev_priv->perf.initialized = false; > +} > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 03725fe..77fe79b 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -258,6 +258,7 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_GEM_USERPTR 0x33 > #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 > #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 > +#define DRM_I915_PERF_OPEN 0x36 > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) > @@ -311,6 +312,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) > #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) > #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) > +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOR(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) As you already pointed out this will need to be IOW. > > /* Allow drivers to submit batchbuffers directly to hardware, relying > * on the security mechanisms provided by hardware. > @@ -1222,6 +1224,71 @@ struct drm_i915_gem_context_param { > __u64 value; > }; > > +enum drm_i915_perf_property_id { > + /** > + * Open the stream for a specific context handle (as used with > + * execbuffer2). A stream opened for a specific context this way > + * won't typically require root privileges. > + */ > + DRM_I915_PERF_PROP_CTX_HANDLE = 1, > + > + DRM_I915_PERF_PROP_MAX /* non-ABI */ > +}; > + > +struct drm_i915_perf_open_param { > + __u32 flags; > +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) > +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) > +#define I915_PERF_FLAG_DISABLED (1<<2) > + > + /** The number of u64 (id, value) pairs */ > + __u32 num_properties; > + > + /** > + * Pointer to array of u64 (id, value) pairs configuring the stream > + * to open. > + */ > + __u64 __user properties_ptr; > +}; > + > +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) > +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) > + > +/** > + * Common to all i915 perf records > + */ > +struct drm_i915_perf_record_header { > + __u32 type; > + __u16 pad; > + __u16 size; > +}; > + > +enum drm_i915_perf_record_type { > + > + /** > + * Samples are the work horse record type whose contents are extensible > + * and defined when opening an i915 perf stream based on the given > + * properties. > + * > + * Boolean properties following the naming convention > + * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in > + * every sample. > + * > + * The order of these sample properties given by userspace has no > + * affect on the ordering of data within a sample. The order will be > + * documented here. > + * > + * struct { > + * struct drm_i915_perf_record_header header; > + * > + * TODO: itemize extensible sample data here > + * }; > + */ > + DRM_I915_PERF_RECORD_SAMPLE = 1, > + > + DRM_I915_PERF_RECORD_MAX /* non-ABI */ > +}; > + > #if defined(__cplusplus) > } > #endif > -- > 2.9.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Fri, Oct 7, 2016 at 10:10 AM, Matthew Auld < matthew.william.auld@gmail.com> wrote: > On 14 September 2016 at 16:32, Robert Bragg <robert@sixbynine.org> wrote: > > > + > > +int i915_perf_open_ioctl_locked(struct drm_device *dev, > > + struct drm_i915_perf_open_param *param, > > + struct perf_open_properties *props, > > + struct drm_file *file) > > +{ > This should be static and also let's just make it take dev_priv directly. > Ah, yep, done. > > + case DRM_I915_PERF_PROP_MAX: > > + BUG(); > We already handle this case above, but I guess we still need this in > order to silence gcc... > right, and preferable to having a default: case, for the future compiler warning to handle any new properties here. > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > > index 03725fe..77fe79b 100644 > > --- a/include/uapi/drm/i915_drm.h > > +++ b/include/uapi/drm/i915_drm.h > > @@ -258,6 +258,7 @@ typedef struct _drm_i915_sarea { > > #define DRM_I915_GEM_USERPTR 0x33 > > #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 > > #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 > > +#define DRM_I915_PERF_OPEN 0x36 > > > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + > DRM_I915_INIT, drm_i915_init_t) > > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + > DRM_I915_FLUSH) > > @@ -311,6 +312,7 @@ typedef struct _drm_i915_sarea { > > #define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR > (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) > > #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR > (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct > drm_i915_gem_context_param) > > #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR > (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct > drm_i915_gem_context_param) > > +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOR(DRM_COMMAND_BASE + > DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) > As you already pointed out this will need to be IOW. > Yeah, changed locally after I realised the mistake here, just didn't get around to posting the patch. Also applied the notes to not redundantly init some vars, spurious new line, redundant include. Thanks, - Robert
On ti, 2016-10-11 at 12:03 -0700, Robert Bragg wrote: > > > + case DRM_I915_PERF_PROP_MAX: > > > + BUG(); > > > > We already handle this case above, but I guess we still need this in > > order to silence gcc... > > right, and preferable to having a default: case, for the future compiler warning to handle any new properties here. Please, do use MISSING_CASE instead. Daniel is known to get upset for far less ;) Generally consensus is that BUG() is used only when there're no other options to back out. Regards, Joonas
On Wed, Oct 12, 2016 at 12:41 PM, Joonas Lahtinen < joonas.lahtinen@linux.intel.com> wrote: > On ti, 2016-10-11 at 12:03 -0700, Robert Bragg wrote: > > > > + case DRM_I915_PERF_PROP_MAX: > > > > + BUG(); > > > > > > We already handle this case above, but I guess we still need this in > > > order to silence gcc... > > > > right, and preferable to having a default: case, for the future compiler > warning to handle any new properties here. > > Please, do use MISSING_CASE instead. Daniel is known to get upset for > far less ;) > > Generally consensus is that BUG() is used only when there're no other > options to back out. > thanks for this pointer. I'll add a default: with MISSING_CASE as that looks like an i915-specific convention; though it seems like a real shame to defer missing case issues to runtime errors instead of taking advantage of the compiler complaining at build time that a case has been forgotten. Thanks, - Robert > > Regards, Joonas > -- > Joonas Lahtinen > Open Source Technology Center > Intel Corporation >
On ke, 2016-10-19 at 17:35 +0100, Robert Bragg wrote: > I'll add a default: with MISSING_CASE as that looks like an i915- > specific convention; though it seems like a real shame to defer > missing case issues to runtime errors instead of taking advantage of > the compiler complaining at build time that a case has been > forgotten. I think the key point here is not "having MISSING_CASE", but "not having BUG". There has been talk about using compile time checking more effectively, so adding default is not needed. You can keep similar code construct but reduce into WARN_ONCE or so. Regards, Joonas
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a998c2b..d991781 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -110,6 +110,9 @@ i915-y += dvo_ch7017.o \ # virtual gpu code i915-y += i915_vgpu.o +# perf code +i915-y += i915_perf.o + ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o include $(src)/gvt/Makefile diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7f4e8ad..14f22fc 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -838,6 +838,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_device_info_dump(dev_priv); + i915_perf_init(dev_priv); + /* Not all pre-production machines fall into this category, only the * very first ones. Almost everything should work, except for maybe * suspend/resume. And we don't implement workarounds that affect only @@ -859,6 +861,7 @@ err_workqueues: */ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { + i915_perf_fini(dev_priv); i915_gem_load_cleanup(&dev_priv->drm); i915_workqueues_cleanup(dev_priv); } @@ -2560,6 +2563,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e2dda8..0f5cd8f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1740,6 +1740,84 @@ struct intel_wm_config { bool sprites_scaled; }; +struct i915_perf_stream; + +struct i915_perf_stream_ops { + /* Enables the collection of HW samples, either in response to + * I915_PERF_IOCTL_ENABLE or implicitly called when stream is + * opened without I915_PERF_FLAG_DISABLED. + */ + void (*enable)(struct i915_perf_stream *stream); + + /* Disables the collection of HW samples, either in response to + * I915_PERF_IOCTL_DISABLE or implicitly called before + * destroying the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /* Return: true if any i915 perf records are ready to read() + * for this stream. + */ + bool (*can_read)(struct i915_perf_stream *stream); + + /* Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /* For handling a blocking read, wait until there is something + * to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait() until + * ->can_read() returns true (if its safe to call ->can_read() + * without the i915 perf lock held). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /* read - Copy buffered metrics as records to userspace + * @buf: the userspace, destination buffer + * @count: the number of bytes to copy, requested by userspace + * @offset: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been + * copied so far and the buffer offset for copying the + * next record. + * + * Copy as many buffered i915 perf samples and records for + * this stream to userspace as will fit in the given buffer. + * + * Only write complete records; returning -ENOSPC if there + * isn't room for a complete record. + * + * Return any error condition that results in a short read + * such as -ENOSPC or -EFAULT, even though these may be + * squashed before returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /* Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +struct i915_perf_stream { + struct drm_i915_private *dev_priv; + + struct list_head link; + + u32 sample_flags; + + struct i915_gem_context *ctx; + bool enabled; + + struct i915_perf_stream_ops *ops; +}; + struct drm_i915_private { struct drm_device drm; @@ -2040,6 +2118,12 @@ struct drm_i915_private { struct i915_runtime_pm pm; + struct { + bool initialized; + struct mutex lock; + struct list_head streams; + } perf; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { void (*resume)(struct drm_i915_private *); @@ -3445,6 +3529,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int i915_perf_open_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, @@ -3555,6 +3642,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, u32 batch_len, bool is_master); +/* i915_perf.c */ +extern void i915_perf_init(struct drm_i915_private *dev_priv); +extern void i915_perf_fini(struct drm_i915_private *dev_priv); + /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c new file mode 100644 index 0000000..87530f5 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -0,0 +1,448 @@ +/* + * Copyright © 2015-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Robert Bragg <robert@sixbynine.org> + */ + +#include <linux/anon_inodes.h> +#include <linux/sizes.h> + +#include "i915_drv.h" + +struct perf_open_properties { + u32 sample_flags; + + u64 single_context:1; + u64 ctx_handle; +}; + +static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, + struct file *file, + char __user *buf, + size_t count, + loff_t *ppos) +{ + /* Note we keep the offset (aka bytes read) separate from any + * error status so that the final check for whether we return + * the bytes read with a higher precedence than any error (see + * comment below) doesn't need to be handled/duplicated in + * stream->ops->read() implementations. + */ + size_t offset = 0; + int ret = stream->ops->read(stream, buf, count, &offset); + + /* If we've successfully copied any data then reporting that + * takes precedence over any internal error status, so the + * data isn't lost. + * + * For example ret will be -ENOSPC whenever there is more + * buffered data than can be copied to userspace, but that's + * only interesting if we weren't able to copy some data + * because it implies the userspace buffer is too small to + * receive a single record (and we never split records). + * + * Another case with ret == -EFAULT is more of a grey area + * since it would seem like bad form for userspace to ask us + * to overrun its buffer, but the user knows best: + * + * http://yarchive.net/comp/linux/partial_reads_writes.html + */ + return offset ?: (ret ?: -EAGAIN); +} + +static ssize_t i915_perf_read(struct file *file, + char __user *buf, + size_t count, + loff_t *ppos) +{ + struct i915_perf_stream *stream = file->private_data; + struct drm_i915_private *dev_priv = stream->dev_priv; + ssize_t ret; + + if (!(file->f_flags & O_NONBLOCK)) { + /* Allow false positives from stream->ops->wait_unlocked. + */ + do { + ret = stream->ops->wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&dev_priv->perf.lock); + ret = i915_perf_read_locked(stream, file, + buf, count, ppos); + mutex_unlock(&dev_priv->perf.lock); + } while (ret == -EAGAIN); + } else { + mutex_lock(&dev_priv->perf.lock); + ret = i915_perf_read_locked(stream, file, buf, count, ppos); + mutex_unlock(&dev_priv->perf.lock); + } + + return ret; +} + +static unsigned int i915_perf_poll_locked(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait) +{ + unsigned int streams = 0; + + stream->ops->poll_wait(stream, file, wait); + + if (stream->ops->can_read(stream)) + streams |= POLLIN; + + return streams; +} + +static unsigned int i915_perf_poll(struct file *file, poll_table *wait) +{ + struct i915_perf_stream *stream = file->private_data; + struct drm_i915_private *dev_priv = stream->dev_priv; + int ret; + + mutex_lock(&dev_priv->perf.lock); + ret = i915_perf_poll_locked(stream, file, wait); + mutex_unlock(&dev_priv->perf.lock); + + return ret; +} + +static void i915_perf_enable_locked(struct i915_perf_stream *stream) +{ + if (stream->enabled) + return; + + /* Allow stream->ops->enable() to refer to this */ + stream->enabled = true; + + if (stream->ops->enable) + stream->ops->enable(stream); +} + +static void i915_perf_disable_locked(struct i915_perf_stream *stream) +{ + if (!stream->enabled) + return; + + /* Allow stream->ops->disable() to refer to this */ + stream->enabled = false; + + if (stream->ops->disable) + stream->ops->disable(stream); +} + +static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case I915_PERF_IOCTL_ENABLE: + i915_perf_enable_locked(stream); + return 0; + case I915_PERF_IOCTL_DISABLE: + i915_perf_disable_locked(stream); + return 0; + } + + return -EINVAL; +} + +static long i915_perf_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct i915_perf_stream *stream = file->private_data; + struct drm_i915_private *dev_priv = stream->dev_priv; + long ret; + + mutex_lock(&dev_priv->perf.lock); + ret = i915_perf_ioctl_locked(stream, cmd, arg); + mutex_unlock(&dev_priv->perf.lock); + + return ret; +} + +static void i915_perf_destroy_locked(struct i915_perf_stream *stream) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + + if (stream->enabled) + i915_perf_disable_locked(stream); + + if (stream->ops->destroy) + stream->ops->destroy(stream); + + list_del(&stream->link); + + if (stream->ctx) { + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_context_put(stream->ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + kfree(stream); +} + +static int i915_perf_release(struct inode *inode, struct file *file) +{ + struct i915_perf_stream *stream = file->private_data; + struct drm_i915_private *dev_priv = stream->dev_priv; + + mutex_lock(&dev_priv->perf.lock); + i915_perf_destroy_locked(stream); + mutex_unlock(&dev_priv->perf.lock); + + return 0; +} + + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = i915_perf_release, + .poll = i915_perf_poll, + .read = i915_perf_read, + .unlocked_ioctl = i915_perf_ioctl, +}; + + +static struct i915_gem_context * +lookup_context(struct drm_i915_private *dev_priv, + struct drm_i915_file_private *file_priv, + u32 ctx_user_handle) +{ + struct i915_gem_context *ctx; + int ret; + + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + return ERR_PTR(ret); + + ctx = i915_gem_context_lookup(file_priv, ctx_user_handle); + if (!IS_ERR(ctx)) + i915_gem_context_get(ctx); + + mutex_unlock(&dev_priv->drm.struct_mutex); + + return ctx; +} + +int i915_perf_open_ioctl_locked(struct drm_device *dev, + struct drm_i915_perf_open_param *param, + struct perf_open_properties *props, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_gem_context *specific_ctx = NULL; + struct i915_perf_stream *stream = NULL; + unsigned long f_flags = 0; + int stream_fd; + int ret = 0; + + if (props->single_context) { + u32 ctx_handle = props->ctx_handle; + struct drm_i915_file_private *file_priv = file->driver_priv; + + specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle); + if (IS_ERR(specific_ctx)) { + ret = PTR_ERR(specific_ctx); + if (ret != -EINTR) + DRM_ERROR("Failed to look up context with ID %u for opening perf stream\n", + ctx_handle); + goto err; + } + } + + if (!specific_ctx && !capable(CAP_SYS_ADMIN)) { + DRM_ERROR("Insufficient privileges to open system-wide i915 perf stream\n"); + ret = -EACCES; + goto err_ctx; + } + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) { + ret = -ENOMEM; + goto err_ctx; + } + + stream->sample_flags = props->sample_flags; + stream->dev_priv = dev_priv; + stream->ctx = specific_ctx; + + /* + * TODO: support sampling something + * + * For now this is as far as we can go. + */ + DRM_ERROR("Unsupported i915 perf stream configuration\n"); + ret = -EINVAL; + goto err_alloc; + + list_add(&stream->link, &dev_priv->perf.streams); + + if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) + f_flags |= O_CLOEXEC; + if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) + f_flags |= O_NONBLOCK; + + stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); + if (stream_fd < 0) { + ret = stream_fd; + goto err_open; + } + + if (!(param->flags & I915_PERF_FLAG_DISABLED)) + i915_perf_enable_locked(stream); + + return stream_fd; + +err_open: + list_del(&stream->link); + if (stream->ops->destroy) + stream->ops->destroy(stream); +err_alloc: + kfree(stream); +err_ctx: + if (specific_ctx) { + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_context_put(specific_ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); + } +err: + return ret; +} + +/* Note we copy the properties from userspace outside of the i915 perf + * mutex to avoid an awkward lockdep with mmap_sem. + * + * Note this function only validates properties in isolation it doesn't + * validate that the combination of properties makes sense or that all + * properties necessary for a particular kind of stream have been set. + */ +static int read_properties_unlocked(struct drm_i915_private *dev_priv, + u64 __user *uprops, + u32 n_props, + struct perf_open_properties *props) +{ + u64 __user *uprop = uprops; + int i; + + memset(props, 0, sizeof(struct perf_open_properties)); + + if (!n_props) { + DRM_ERROR("No i915 perf properties given"); + return -EINVAL; + } + + if (n_props > DRM_I915_PERF_PROP_MAX) { + DRM_ERROR("More i915 perf properties specified than exist"); + return -EINVAL; + } + + for (i = 0; i < n_props; i++) { + u64 id, value; + int ret; + + ret = get_user(id, (u64 __user *)uprop); + if (ret) + return ret; + + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { + DRM_ERROR("Unknown i915 perf property ID"); + return -EINVAL; + } + + ret = get_user(value, (u64 __user *)uprop + 1); + if (ret) + return ret; + + switch ((enum drm_i915_perf_property_id)id) { + case DRM_I915_PERF_PROP_CTX_HANDLE: + props->single_context = 1; + props->ctx_handle = value; + break; + + case DRM_I915_PERF_PROP_MAX: + BUG(); + } + + uprop += 2; + } + + return 0; +} + +int i915_perf_open_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_perf_open_param *param = data; + struct perf_open_properties props; + u32 known_open_flags = 0; + int ret; + + if (!dev_priv->perf.initialized) { + DRM_ERROR("i915 perf interface not available for this system"); + return -ENOTSUPP; + } + + known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | + I915_PERF_FLAG_FD_NONBLOCK | + I915_PERF_FLAG_DISABLED; + if (param->flags & ~known_open_flags) { + DRM_ERROR("Unknown drm_i915_perf_open_param flag\n"); + return -EINVAL; + } + + ret = read_properties_unlocked(dev_priv, + u64_to_user_ptr(param->properties_ptr), + param->num_properties, + &props); + if (ret) + return ret; + + mutex_lock(&dev_priv->perf.lock); + ret = i915_perf_open_ioctl_locked(dev, param, &props, file); + mutex_unlock(&dev_priv->perf.lock); + + return ret; +} + +void i915_perf_init(struct drm_i915_private *dev_priv) +{ + INIT_LIST_HEAD(&dev_priv->perf.streams); + mutex_init(&dev_priv->perf.lock); + + dev_priv->perf.initialized = true; +} + +void i915_perf_fini(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->perf.initialized) + return; + + /* Currently nothing to clean up */ + + dev_priv->perf.initialized = false; +} diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 03725fe..77fe79b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -258,6 +258,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_USERPTR 0x33 #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 +#define DRM_I915_PERF_OPEN 0x36 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -311,6 +312,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOR(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1222,6 +1224,71 @@ struct drm_i915_gem_context_param { __u64 value; }; +enum drm_i915_perf_property_id { + /** + * Open the stream for a specific context handle (as used with + * execbuffer2). A stream opened for a specific context this way + * won't typically require root privileges. + */ + DRM_I915_PERF_PROP_CTX_HANDLE = 1, + + DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +struct drm_i915_perf_open_param { + __u32 flags; +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) +#define I915_PERF_FLAG_DISABLED (1<<2) + + /** The number of u64 (id, value) pairs */ + __u32 num_properties; + + /** + * Pointer to array of u64 (id, value) pairs configuring the stream + * to open. + */ + __u64 __user properties_ptr; +}; + +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) + +/** + * Common to all i915 perf records + */ +struct drm_i915_perf_record_header { + __u32 type; + __u16 pad; + __u16 size; +}; + +enum drm_i915_perf_record_type { + + /** + * Samples are the work horse record type whose contents are extensible + * and defined when opening an i915 perf stream based on the given + * properties. + * + * Boolean properties following the naming convention + * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in + * every sample. + * + * The order of these sample properties given by userspace has no + * affect on the ordering of data within a sample. The order will be + * documented here. + * + * struct { + * struct drm_i915_perf_record_header header; + * + * TODO: itemize extensible sample data here + * }; + */ + DRM_I915_PERF_RECORD_SAMPLE = 1, + + DRM_I915_PERF_RECORD_MAX /* non-ABI */ +}; + #if defined(__cplusplus) } #endif
Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v4: s/DRM_IORW/DRM_IOR/ - Emil Velikov v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v2: use i915_gem_context_get() - Chris Wilson Signed-off-by: Robert Bragg <robert@sixbynine.org> --- drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 91 ++++++++ drivers/gpu/drm/i915/i915_perf.c | 448 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 67 ++++++ 5 files changed, 613 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_perf.c