Should interruptible version be used here, to allow for reads to beOn Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Adds base i915 perf infrastructure for Gen performance metrics.
>
> This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
> properties to configure a stream of metrics and returns a new fd usable
> with standard VFS system calls including read() to read typed and sized
> records; ioctl() to enable or disable capture and poll() to wait for
> data.
>
> A stream is opened something like:
>
> uint64_t properties[] = {
> /* Single context sampling */
> DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
>
> /* Include OA reports in samples */
> DRM_I915_PERF_PROP_SAMPLE_OA, true,
>
> /* OA unit configuration */
> DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
> DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
> };
> struct drm_i915_perf_open_param parm = {
> .flags = I915_PERF_FLAG_FD_CLOEXEC |
> I915_PERF_FLAG_FD_NONBLOCK |
> I915_PERF_FLAG_DISABLED,
> .properties_ptr = (uint64_t)properties,
> .num_properties = sizeof(properties) / 16,
> };
> int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
>
> Records read all start with a common { type, size } header with
> DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
> contain an extensible number of fields and it's the
> DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
> determine what's included in every sample.
>
> No specific streams are supported yet so any attempt to open a stream
> will return an error.
>
> v2:
> use i915_gem_context_get() - Chris Wilson
> v3:
> update read() interface to avoid passing state struct - Chris Wilson
> fix some rebase fallout, with i915-perf init/deinit
> v4:
> s/DRM_IORW/DRM_IOW/ - Emil Velikov
>
> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> ---
> drivers/gpu/drm/i915/Makefile | 3 +
> drivers/gpu/drm/i915/i915_drv.c | 4 +
> drivers/gpu/drm/i915/i915_drv.h | 91 ++++++++
> drivers/gpu/drm/i915/i915_perf.c | 443 ++++++++++++++++++++++++++++++ +++++++++
> include/uapi/drm/i915_drm.h | 67 ++++++
> 5 files changed, 608 insertions(+)
> create mode 100644 drivers/gpu/drm/i915/i915_perf.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/ Makefile
> index 6123400..8d4e25f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
> # virtual gpu code
> i915-y += i915_vgpu.o
>
> +# perf code
> +i915-y += i915_perf.o
> +
> ifeq ($(CONFIG_DRM_I915_GVT),y)
> i915-y += intel_gvt.o
> include $(src)/gvt/Makefile
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_ drv.c
> index af3559d..685c96e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>
> intel_detect_preproduction_hw(dev_priv);
>
> + i915_perf_init(dev_priv);
> +
> return 0;
>
> err_workqueues:
> @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
> */
> static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
> {
> + i915_perf_fini(dev_priv);
> i915_gem_load_cleanup(&dev_priv->drm);
> i915_workqueues_cleanup(dev_priv);
> }
> @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
> DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
> };
>
> static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_ drv.h
> index 5a260db..7a65c0b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1767,6 +1767,84 @@ struct intel_wm_config {
> bool sprites_scaled;
> };
>
> +struct i915_perf_stream;
> +
> +struct i915_perf_stream_ops {
> + /* Enables the collection of HW samples, either in response to
> + * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
> + * opened without I915_PERF_FLAG_DISABLED.
> + */
> + void (*enable)(struct i915_perf_stream *stream);
> +
> + /* Disables the collection of HW samples, either in response to
> + * I915_PERF_IOCTL_DISABLE or implicitly called before
> + * destroying the stream.
> + */
> + void (*disable)(struct i915_perf_stream *stream);
> +
> + /* Return: true if any i915 perf records are ready to read()
> + * for this stream.
> + */
> + bool (*can_read)(struct i915_perf_stream *stream);
> +
> + /* Call poll_wait, passing a wait queue that will be woken
> + * once there is something ready to read() for the stream
> + */
> + void (*poll_wait)(struct i915_perf_stream *stream,
> + struct file *file,
> + poll_table *wait);
> +
> + /* For handling a blocking read, wait until there is something
> + * to ready to read() for the stream. E.g. wait on the same
> + * wait queue that would be passed to poll_wait() until
> + * ->can_read() returns true (if its safe to call ->can_read()
> + * without the i915 perf lock held).
> + */
> + int (*wait_unlocked)(struct i915_perf_stream *stream);
> +
> + /* read - Copy buffered metrics as records to userspace
> + * @buf: the userspace, destination buffer
> + * @count: the number of bytes to copy, requested by userspace
> + * @offset: zero at the start of the read, updated as the read
> + * proceeds, it represents how many bytes have been
> + * copied so far and the buffer offset for copying the
> + * next record.
> + *
> + * Copy as many buffered i915 perf samples and records for
> + * this stream to userspace as will fit in the given buffer.
> + *
> + * Only write complete records; returning -ENOSPC if there
> + * isn't room for a complete record.
> + *
> + * Return any error condition that results in a short read
> + * such as -ENOSPC or -EFAULT, even though these may be
> + * squashed before returning to userspace.
> + */
> + int (*read)(struct i915_perf_stream *stream,
> + char __user *buf,
> + size_t count,
> + size_t *offset);
> +
> + /* Cleanup any stream specific resources.
> + *
> + * The stream will always be disabled before this is called.
> + */
> + void (*destroy)(struct i915_perf_stream *stream);
> +};
> +
> +struct i915_perf_stream {
> + struct drm_i915_private *dev_priv;
> +
> + struct list_head link;
> +
> + u32 sample_flags;
> +
> + struct i915_gem_context *ctx;
> + bool enabled;
> +
> + struct i915_perf_stream_ops *ops;
> +};
> +
> struct drm_i915_private {
> struct drm_device drm;
>
> @@ -2069,6 +2147,12 @@ struct drm_i915_private {
>
> struct i915_runtime_pm pm;
>
> + struct {
> + bool initialized;
> + struct mutex lock;
> + struct list_head streams;
> + } perf;
> +
> /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
> struct {
> void (*resume)(struct drm_i915_private *);
> @@ -3482,6 +3566,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
>
> +int i915_perf_open_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +
> /* i915_gem_evict.c */
> int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> u64 min_size, u64 alignment,
> @@ -3607,6 +3694,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
> u32 batch_len,
> bool is_master);
>
> +/* i915_perf.c */
> +extern void i915_perf_init(struct drm_i915_private *dev_priv);
> +extern void i915_perf_fini(struct drm_i915_private *dev_priv);
> +
> /* i915_suspend.c */
> extern int i915_save_state(struct drm_device *dev);
> extern int i915_restore_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_ perf.c
> new file mode 100644
> index 0000000..c45cf92
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -0,0 +1,443 @@
> +/*
> + * Copyright © 2015-2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Robert Bragg <robert@sixbynine.org>
> + */
> +
> +#include <linux/anon_inodes.h>
> +
> +#include "i915_drv.h"
> +
> +struct perf_open_properties {
> + u32 sample_flags;
> +
> + u64 single_context:1;
> + u64 ctx_handle;
> +};
> +
> +static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
> + struct file *file,
> + char __user *buf,
> + size_t count,
> + loff_t *ppos)
> +{
> + /* Note we keep the offset (aka bytes read) separate from any
> + * error status so that the final check for whether we return
> + * the bytes read with a higher precedence than any error (see
> + * comment below) doesn't need to be handled/duplicated in
> + * stream->ops->read() implementations.
> + */
> + size_t offset = 0;
> + int ret = stream->ops->read(stream, buf, count, &offset);
> +
> + /* If we've successfully copied any data then reporting that
> + * takes precedence over any internal error status, so the
> + * data isn't lost.
> + *
> + * For example ret will be -ENOSPC whenever there is more
> + * buffered data than can be copied to userspace, but that's
> + * only interesting if we weren't able to copy some data
> + * because it implies the userspace buffer is too small to
> + * receive a single record (and we never split records).
> + *
> + * Another case with ret == -EFAULT is more of a grey area
> + * since it would seem like bad form for userspace to ask us
> + * to overrun its buffer, but the user knows best:
> + *
> + * http://yarchive.net/comp/linux/partial_reads_writes. html
> + */
> + return offset ?: (ret ?: -EAGAIN);
> +}
> +
> +static ssize_t i915_perf_read(struct file *file,
> + char __user *buf,
> + size_t count,
> + loff_t *ppos)
> +{
> + struct i915_perf_stream *stream = file->private_data;
> + struct drm_i915_private *dev_priv = stream->dev_priv;
> + ssize_t ret;
> +
> + if (!(file->f_flags & O_NONBLOCK)) {
> + /* Allow false positives from stream->ops->wait_unlocked.
> + */
> + do {
> + ret = stream->ops->wait_unlocked(stream);
> + if (ret)
> + return ret;
> +
> + mutex_lock(&dev_priv->perf.lock);
interrupted?