From: Rob Clark robdclark@chromium.org
The DEFINE_DRM_GEM_FOPS() helper is a bit limiting if a driver wants to provide additional file ops, like show_fdinfo().
Signed-off-by: Rob Clark robdclark@chromium.org --- include/drm/drm_gem.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 9d7c61a122dc..dc88d4a2cdf6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -314,6 +314,23 @@ struct drm_gem_object { const struct drm_gem_object_funcs *funcs; };
+/** + * DRM_GEM_FOPS - Default drm GEM file operations + * + * This macro provides a shorthand for setting the GEM file ops in the + * &file_operations structure. + */ +#define DRM_GEM_FOPS \ + .open = drm_open,\ + .release = drm_release,\ + .unlocked_ioctl = drm_ioctl,\ + .compat_ioctl = drm_compat_ioctl,\ + .poll = drm_poll,\ + .read = drm_read,\ + .llseek = noop_llseek,\ + .mmap = drm_gem_mmap + + /** * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers * @name: name for the generated structure @@ -330,14 +347,7 @@ struct drm_gem_object { #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\ - .open = drm_open,\ - .release = drm_release,\ - .unlocked_ioctl = drm_ioctl,\ - .compat_ioctl = drm_compat_ioctl,\ - .poll = drm_poll,\ - .read = drm_read,\ - .llseek = noop_llseek,\ - .mmap = drm_gem_mmap,\ + DRM_GEM_FOPS,\ }
void drm_gem_object_release(struct drm_gem_object *obj);
From: Rob Clark robdclark@chromium.org
Similar to AMD commit 874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the infrastructure added in previous patches, we add basic client info and GPU engine utilisation for msm.
Example output:
# cat /proc/`pgrep glmark2`/fdinfo/6 pos: 0 flags: 02400002 mnt_id: 21 ino: 162 drm-driver: msm drm-client-id: 7 drm-engine-gpu: 1734371319 ns drm-cycles-gpu: 1153645024 drm-maxfreq-gpu: 800000000 Hz
See also: https://patchwork.freedesktop.org/patch/468505/
Signed-off-by: Rob Clark robdclark@chromium.org --- Documentation/gpu/drm-usage-stats.rst | 21 +++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++++++++- drivers/gpu/drm/msm/msm_gpu.c | 21 +++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 19 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6c9f166a8d6f..60e5cc9c13ad 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -105,6 +105,27 @@ object belong to this client, in the respective memory region. Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB' indicating kibi- or mebi-bytes.
+- drm-cycles-<str> <uint> + +Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the number of busy cycles for the given +engine. + +Values are not required to be constantly monotonic if it makes the driver +implementation easier, but are required to catch up with the previously reported +larger value within a reasonable period. Upon observing a value lower than what +was previously read, userspace is expected to stay with that larger previous +value until a monotonic update is seen. + +- drm-maxfreq-<str> <uint> [Hz|MHz|KHz] + +Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the maxium frequence for the given +engine. Taken together with drm-cycles-<str>, this can be used to calculate +percentage utilization of the engine, whereas drm-engine-<str> only refects +time active without considering what frequency the engine is operating as a +percentage of it's maximum frequency. + =============================== Driver specific implementations =============================== diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 14ab9a627d8b..57a66093e671 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -948,7 +948,24 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), };
-DEFINE_DRM_GEM_FOPS(fops); +static void msm_fop_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct drm_file *file = f->private_data; + struct drm_device *dev = file->minor->dev; + struct msm_drm_private *priv = dev->dev_private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!priv->gpu) + return; + + msm_gpu_show_fdinfo(priv->gpu, file->driver_priv, &p); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = msm_fop_show_fdinfo, +};
static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..333a9a299b41 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -4,6 +4,8 @@ * Author: Rob Clark robdclark@gmail.com */
+#include "drm/drm_drv.h" + #include "msm_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -146,6 +148,16 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) return 0; }
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, + struct drm_printer *p) +{ + drm_printf(p, "drm-driver:\t%s\n", gpu->dev->driver->name); + drm_printf(p, "drm-client-id:\t%u\n", ctx->seqno); + drm_printf(p, "drm-engine-gpu:\t%llu ns\n", ctx->elapsed_ns); + drm_printf(p, "drm-cycles-gpu:\t%llu\n", ctx->cycles); + drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate); +} + int msm_gpu_hw_init(struct msm_gpu *gpu) { int ret; @@ -652,7 +664,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, { int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats; - u64 elapsed, clock = 0; + u64 elapsed, clock = 0, cycles; unsigned long flags;
stats = &ring->memptrs->stats[index]; @@ -660,12 +672,17 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; do_div(elapsed, 192);
+ cycles = stats->cpcycles_end - stats->cpcycles_start; + /* Calculate the clock frequency from the number of CP cycles */ if (elapsed) { - clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000; + clock = cycles * 1000; do_div(clock, elapsed); }
+ submit->queue->ctx->elapsed_ns += elapsed; + submit->queue->ctx->cycles += cycles; + trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6def00883046..4911943ba53b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -361,6 +361,22 @@ struct msm_file_private { /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ char *cmdline;
+ /** + * elapsed: + * + * The total (cumulative) elapsed time GPU was busy with rendering + * from this context in ns. + */ + uint64_t elapsed_ns; + + /** + * cycles: + * + * The total (cumulative) GPU cycles elapsed attributed to this + * context. + */ + uint64_t cycles; + /** * entities: * @@ -544,6 +560,9 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu);
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, + struct drm_printer *p); + int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id);
Hi Rob,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm/drm-next] [also build test WARNING on drm-intel/for-linux-next drm-tip/drm-tip v5.19-rc1 next-20220607] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/intel-lab-lkp/linux/commits/Rob-Clark/drm-Add-DRM_GEM_FOP... base: git://anongit.freedesktop.org/drm/drm drm-next config: s390-buildonly-randconfig-r008-20220605 (https://download.01.org/0day-ci/archive/20220607/202206071325.FWDwmg2D-lkp@i...) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project b92436efcb7813fc481b30f2593a4907568d917a) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install s390 cross compiling tool for clang build # apt-get install binutils-s390x-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/09342d3c56fa77dacb235908515f0a... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Rob-Clark/drm-Add-DRM_GEM_FOPS/20220607-035529 git checkout 09342d3c56fa77dacb235908515f0a44ac2fc9c2 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash drivers/gpu/drm/amd/amdgpu/ drivers/gpu/drm/msm/
If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
In file included from drivers/gpu/drm/msm/msm_gpu.c:9: In file included from drivers/gpu/drm/msm/msm_gpu.h:10: In file included from include/linux/adreno-smmu-priv.h:9: In file included from include/linux/io-pgtable.h:6: In file included from include/linux/iommu.h:10: In file included from include/linux/scatterlist.h:9: In file included from arch/s390/include/asm/io.h:75: include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __raw_readb(PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) ^ include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16' #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:9: In file included from drivers/gpu/drm/msm/msm_gpu.h:10: In file included from include/linux/adreno-smmu-priv.h:9: In file included from include/linux/io-pgtable.h:6: In file included from include/linux/iommu.h:10: In file included from include/linux/scatterlist.h:9: In file included from arch/s390/include/asm/io.h:75: include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu' #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x)) ^ include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32' #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:9: In file included from drivers/gpu/drm/msm/msm_gpu.h:10: In file included from include/linux/adreno-smmu-priv.h:9: In file included from include/linux/io-pgtable.h:6: In file included from include/linux/iommu.h:10: In file included from include/linux/scatterlist.h:9: In file included from arch/s390/include/asm/io.h:75: include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writeb(value, PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:609:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsb(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:617:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsw(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:625:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsl(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:634:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesb(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:643:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesw(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:652:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesl(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^
drivers/gpu/drm/msm/msm_gpu.c:158:46: warning: format specifies type 'unsigned long' but the argument has type 'uint32_t' (aka 'unsigned int') [-Wformat]
drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate); ~~~ ^~~~~~~~~~~~~~ %u In file included from drivers/gpu/drm/msm/msm_gpu.c:7: In file included from include/drm/drm_drv.h:33: In file included from include/drm/drm_device.h:5: In file included from include/linux/kref.h:16: In file included from include/linux/spinlock.h:93: arch/s390/include/asm/spinlock.h:81:3: error: expected absolute expression ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ ^ arch/s390/include/asm/alternative.h:118:2: note: expanded from macro 'ALTERNATIVE' ALTINSTR_REPLACEMENT(altinstr, 1) \ ^ arch/s390/include/asm/alternative.h:113:2: note: expanded from macro 'ALTINSTR_REPLACEMENT' INSTR_LEN_SANITY_CHECK(altinstr_len(num)) ^ arch/s390/include/asm/alternative.h:62:3: note: expanded from macro 'INSTR_LEN_SANITY_CHECK' ".if " len " > 254\n" \ ^ <inline asm>:5:5: note: instantiated into assembly here .if 6651b-6641b > 254 ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:7: In file included from include/drm/drm_drv.h:33: In file included from include/drm/drm_device.h:5: In file included from include/linux/kref.h:16: In file included from include/linux/spinlock.h:93: arch/s390/include/asm/spinlock.h:81:3: error: cpu alternatives does not support instructions blocks > 254 bytes ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ ^ arch/s390/include/asm/alternative.h:118:2: note: expanded from macro 'ALTERNATIVE' ALTINSTR_REPLACEMENT(altinstr, 1) \ ^ arch/s390/include/asm/alternative.h:113:2: note: expanded from macro 'ALTINSTR_REPLACEMENT' INSTR_LEN_SANITY_CHECK(altinstr_len(num)) ^ arch/s390/include/asm/alternative.h:63:3: note: expanded from macro 'INSTR_LEN_SANITY_CHECK' "\t.error "cpu alternatives does not support instructions " \ ^ <inline asm>:6:2: note: instantiated into assembly here .error "cpu alternatives does not support instructions blocks > 254 bytes" ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:7: In file included from include/drm/drm_drv.h:33: In file included from include/drm/drm_device.h:5: In file included from include/linux/kref.h:16: In file included from include/linux/spinlock.h:93: arch/s390/include/asm/spinlock.h:81:3: error: expected absolute expression ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ ^ arch/s390/include/asm/alternative.h:118:2: note: expanded from macro 'ALTERNATIVE' ALTINSTR_REPLACEMENT(altinstr, 1) \ ^ arch/s390/include/asm/alternative.h:113:2: note: expanded from macro 'ALTINSTR_REPLACEMENT' INSTR_LEN_SANITY_CHECK(altinstr_len(num)) ^ arch/s390/include/asm/alternative.h:66:3: note: expanded from macro 'INSTR_LEN_SANITY_CHECK' ".if (" len ") %% 2\n" \ ^ <inline asm>:8:5: note: instantiated into assembly here .if (6651b-6641b) % 2 ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:7: In file included from include/drm/drm_drv.h:33: In file included from include/drm/drm_device.h:5: In file included from include/linux/kref.h:16: In file included from include/linux/spinlock.h:93: arch/s390/include/asm/spinlock.h:81:3: error: cpu alternatives instructions length is odd ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ ^ arch/s390/include/asm/alternative.h:118:2: note: expanded from macro 'ALTERNATIVE' ALTINSTR_REPLACEMENT(altinstr, 1) \ ^ arch/s390/include/asm/alternative.h:113:2: note: expanded from macro 'ALTINSTR_REPLACEMENT' INSTR_LEN_SANITY_CHECK(altinstr_len(num)) ^ arch/s390/include/asm/alternative.h:67:3: note: expanded from macro 'INSTR_LEN_SANITY_CHECK' "\t.error "cpu alternatives instructions length is odd"\n" \ ^ <inline asm>:9:2: note: instantiated into assembly here .error "cpu alternatives instructions length is odd" ^ In file included from drivers/gpu/drm/msm/msm_gpu.c:7: In file included from include/drm/drm_drv.h:33: In file included from include/drm/drm_device.h:5: In file included from include/linux/kref.h:16: In file included from include/linux/spinlock.h:93: arch/s390/include/asm/spinlock.h:81:3: error: expected absolute expression ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ ^ arch/s390/include/asm/alternative.h:120:2: note: expanded from macro 'ALTERNATIVE' OLDINSTR(oldinstr, 1) \ ^ arch/s390/include/asm/alternative.h:90:2: note: expanded from macro 'OLDINSTR' OLDINSTR_PADDING(oldinstr, num) \ ^ arch/s390/include/asm/alternative.h:71:3: note: expanded from macro 'OLDINSTR_PADDING' ".if " oldinstr_pad_len(num) " > 6\n" \ ^
vim +158 drivers/gpu/drm/msm/msm_gpu.c
150 151 void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, 152 struct drm_printer *p) 153 { 154 drm_printf(p, "drm-driver:\t%s\n", gpu->dev->driver->name); 155 drm_printf(p, "drm-client-id:\t%u\n", ctx->seqno); 156 drm_printf(p, "drm-engine-gpu:\t%llu ns\n", ctx->elapsed_ns); 157 drm_printf(p, "drm-cycles-gpu:\t%llu\n", ctx->cycles);
158 drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate);
159 } 160
On 06/06/2022 20:54, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
Similar to AMD commit 874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the infrastructure added in previous patches, we add basic client info and GPU engine utilisation for msm.
Example output:
# cat /proc/`pgrep glmark2`/fdinfo/6 pos: 0 flags: 02400002 mnt_id: 21 ino: 162 drm-driver: msm drm-client-id: 7 drm-engine-gpu: 1734371319 ns drm-cycles-gpu: 1153645024 drm-maxfreq-gpu: 800000000 Hz
See also: https://patchwork.freedesktop.org/patch/468505/
Signed-off-by: Rob Clark robdclark@chromium.org
Documentation/gpu/drm-usage-stats.rst | 21 +++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++++++++- drivers/gpu/drm/msm/msm_gpu.c | 21 +++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 19 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6c9f166a8d6f..60e5cc9c13ad 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -105,6 +105,27 @@ object belong to this client, in the respective memory region. Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB' indicating kibi- or mebi-bytes.
+- drm-cycles-<str> <uint>
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the number of busy cycles for the given +engine.
+Values are not required to be constantly monotonic if it makes the driver +implementation easier, but are required to catch up with the previously reported +larger value within a reasonable period. Upon observing a value lower than what +was previously read, userspace is expected to stay with that larger previous +value until a monotonic update is seen.
+- drm-maxfreq-<str> <uint> [Hz|MHz|KHz]
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the maxium frequence for the given
maximum frequency
+engine. Taken together with drm-cycles-<str>, this can be used to calculate +percentage utilization of the engine, whereas drm-engine-<str> only refects
reflects
+time active without considering what frequency the engine is operating as a +percentage of it's maximum frequency.
Cycles vs max freq sounds very useful. My reservations is that how come the idea hasn't happened in the CPU world. Or maybe it has and I am un-informed?
In any case, if going with this I think we need to clarify the text that the value should reflect the current soft limit, where the driver supports that, in case it has been set to lower than the maximum frequency hardware can support. I am thinking about avoiding "my gpu cannot hit 100%" support incidents in cases when user/admin lowered the soft limit for some reason. Possibly does not apply to msm but can apply to i915, if we decided to export the same data.
No other gotchas come to mind at the moment.
Regards,
Tvrtko
- =============================== Driver specific implementations ===============================
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 14ab9a627d8b..57a66093e671 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -948,7 +948,24 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), };
-DEFINE_DRM_GEM_FOPS(fops); +static void msm_fop_show_fdinfo(struct seq_file *m, struct file *f) +{
- struct drm_file *file = f->private_data;
- struct drm_device *dev = file->minor->dev;
- struct msm_drm_private *priv = dev->dev_private;
- struct drm_printer p = drm_seq_file_printer(m);
- if (!priv->gpu)
return;
- msm_gpu_show_fdinfo(priv->gpu, file->driver_priv, &p);
+}
+static const struct file_operations fops = {
- .owner = THIS_MODULE,
- DRM_GEM_FOPS,
- .show_fdinfo = msm_fop_show_fdinfo,
+};
static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..333a9a299b41 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -4,6 +4,8 @@
- Author: Rob Clark robdclark@gmail.com
*/
+#include "drm/drm_drv.h"
- #include "msm_gpu.h" #include "msm_gem.h" #include "msm_mmu.h"
@@ -146,6 +148,16 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) return 0; }
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p)
+{
- drm_printf(p, "drm-driver:\t%s\n", gpu->dev->driver->name);
- drm_printf(p, "drm-client-id:\t%u\n", ctx->seqno);
- drm_printf(p, "drm-engine-gpu:\t%llu ns\n", ctx->elapsed_ns);
- drm_printf(p, "drm-cycles-gpu:\t%llu\n", ctx->cycles);
- drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate);
+}
- int msm_gpu_hw_init(struct msm_gpu *gpu) { int ret;
@@ -652,7 +664,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, { int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats;
- u64 elapsed, clock = 0;
u64 elapsed, clock = 0, cycles; unsigned long flags;
stats = &ring->memptrs->stats[index];
@@ -660,12 +672,17 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; do_div(elapsed, 192);
- cycles = stats->cpcycles_end - stats->cpcycles_start;
- /* Calculate the clock frequency from the number of CP cycles */ if (elapsed) {
clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
clock = cycles * 1000;
do_div(clock, elapsed); }
submit->queue->ctx->elapsed_ns += elapsed;
submit->queue->ctx->cycles += cycles;
trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6def00883046..4911943ba53b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -361,6 +361,22 @@ struct msm_file_private { /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ char *cmdline;
- /**
* elapsed:
*
* The total (cumulative) elapsed time GPU was busy with rendering
* from this context in ns.
*/
- uint64_t elapsed_ns;
- /**
* cycles:
*
* The total (cumulative) GPU cycles elapsed attributed to this
* context.
*/
- uint64_t cycles;
- /**
- entities:
@@ -544,6 +560,9 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu);
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p);
- int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id);
On Tue, Jun 7, 2022 at 1:56 AM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:
On 06/06/2022 20:54, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
Similar to AMD commit 874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the infrastructure added in previous patches, we add basic client info and GPU engine utilisation for msm.
Example output:
# cat /proc/`pgrep glmark2`/fdinfo/6 pos: 0 flags: 02400002 mnt_id: 21 ino: 162 drm-driver: msm drm-client-id: 7 drm-engine-gpu: 1734371319 ns drm-cycles-gpu: 1153645024 drm-maxfreq-gpu: 800000000 Hz
See also: https://patchwork.freedesktop.org/patch/468505/
Signed-off-by: Rob Clark robdclark@chromium.org
Documentation/gpu/drm-usage-stats.rst | 21 +++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++++++++- drivers/gpu/drm/msm/msm_gpu.c | 21 +++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 19 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6c9f166a8d6f..60e5cc9c13ad 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -105,6 +105,27 @@ object belong to this client, in the respective memory region. Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB' indicating kibi- or mebi-bytes.
+- drm-cycles-<str> <uint>
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the number of busy cycles for the given +engine.
+Values are not required to be constantly monotonic if it makes the driver +implementation easier, but are required to catch up with the previously reported +larger value within a reasonable period. Upon observing a value lower than what +was previously read, userspace is expected to stay with that larger previous +value until a monotonic update is seen.
+- drm-maxfreq-<str> <uint> [Hz|MHz|KHz]
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the maxium frequence for the given
maximum frequency
+engine. Taken together with drm-cycles-<str>, this can be used to calculate +percentage utilization of the engine, whereas drm-engine-<str> only refects
reflects
+time active without considering what frequency the engine is operating as a +percentage of it's maximum frequency.
Cycles vs max freq sounds very useful. My reservations is that how come the idea hasn't happened in the CPU world. Or maybe it has and I am un-informed?
I do often pay attention to both where tasks get scheduled, and the individual CPU freq when I'm profiling CPU side stuff (eg. in perfetto)
I could also report "always-count" cycles, I think, which could be used by gputop to derive freq. I'd have to think about that a bit, since keeping the result monotinic(ish) might be a bit tricky (the hw counter loses state across runtime suspend)
In any case, if going with this I think we need to clarify the text that the value should reflect the current soft limit, where the driver supports that, in case it has been set to lower than the maximum frequency hardware can support. I am thinking about avoiding "my gpu cannot hit 100%" support incidents in cases when user/admin lowered the soft limit for some reason. Possibly does not apply to msm but can apply to i915, if we decided to export the same data.
Yes, with pm-qos thermal or userspace could limit the max freq.. but we also internally use a pm-qos constraint to reduce freq when the GPU is idle, and I don't think there is a good way to differentiate *which* constraint is which. I'll add something involving the word "recommended" ;-)
BR, -R
No other gotchas come to mind at the moment.
Regards,
Tvrtko
- =============================== Driver specific implementations ===============================
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 14ab9a627d8b..57a66093e671 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -948,7 +948,24 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), };
-DEFINE_DRM_GEM_FOPS(fops); +static void msm_fop_show_fdinfo(struct seq_file *m, struct file *f) +{
struct drm_file *file = f->private_data;
struct drm_device *dev = file->minor->dev;
struct msm_drm_private *priv = dev->dev_private;
struct drm_printer p = drm_seq_file_printer(m);
if (!priv->gpu)
return;
msm_gpu_show_fdinfo(priv->gpu, file->driver_priv, &p);
+}
+static const struct file_operations fops = {
.owner = THIS_MODULE,
DRM_GEM_FOPS,
.show_fdinfo = msm_fop_show_fdinfo,
+};
static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..333a9a299b41 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -4,6 +4,8 @@
- Author: Rob Clark robdclark@gmail.com
*/
+#include "drm/drm_drv.h"
- #include "msm_gpu.h" #include "msm_gem.h" #include "msm_mmu.h"
@@ -146,6 +148,16 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) return 0; }
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p)
+{
drm_printf(p, "drm-driver:\t%s\n", gpu->dev->driver->name);
drm_printf(p, "drm-client-id:\t%u\n", ctx->seqno);
drm_printf(p, "drm-engine-gpu:\t%llu ns\n", ctx->elapsed_ns);
drm_printf(p, "drm-cycles-gpu:\t%llu\n", ctx->cycles);
drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate);
+}
- int msm_gpu_hw_init(struct msm_gpu *gpu) { int ret;
@@ -652,7 +664,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, { int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats;
u64 elapsed, clock = 0;
u64 elapsed, clock = 0, cycles; unsigned long flags; stats = &ring->memptrs->stats[index];
@@ -660,12 +672,17 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; do_div(elapsed, 192);
cycles = stats->cpcycles_end - stats->cpcycles_start;
/* Calculate the clock frequency from the number of CP cycles */ if (elapsed) {
clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
clock = cycles * 1000; do_div(clock, elapsed); }
submit->queue->ctx->elapsed_ns += elapsed;
submit->queue->ctx->cycles += cycles;
trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6def00883046..4911943ba53b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -361,6 +361,22 @@ struct msm_file_private { /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ char *cmdline;
/**
* elapsed:
*
* The total (cumulative) elapsed time GPU was busy with rendering
* from this context in ns.
*/
uint64_t elapsed_ns;
/**
* cycles:
*
* The total (cumulative) GPU cycles elapsed attributed to this
* context.
*/
uint64_t cycles;
/** * entities: *
@@ -544,6 +560,9 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu);
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p);
- int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id);
On Tue, Jun 7, 2022 at 9:02 AM Rob Clark robdclark@gmail.com wrote:
On Tue, Jun 7, 2022 at 1:56 AM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:
On 06/06/2022 20:54, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
Similar to AMD commit 874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the infrastructure added in previous patches, we add basic client info and GPU engine utilisation for msm.
Example output:
# cat /proc/`pgrep glmark2`/fdinfo/6 pos: 0 flags: 02400002 mnt_id: 21 ino: 162 drm-driver: msm drm-client-id: 7 drm-engine-gpu: 1734371319 ns drm-cycles-gpu: 1153645024 drm-maxfreq-gpu: 800000000 Hz
See also: https://patchwork.freedesktop.org/patch/468505/
Signed-off-by: Rob Clark robdclark@chromium.org
Documentation/gpu/drm-usage-stats.rst | 21 +++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++++++++- drivers/gpu/drm/msm/msm_gpu.c | 21 +++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 19 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6c9f166a8d6f..60e5cc9c13ad 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -105,6 +105,27 @@ object belong to this client, in the respective memory region. Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB' indicating kibi- or mebi-bytes.
+- drm-cycles-<str> <uint>
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the number of busy cycles for the given +engine.
+Values are not required to be constantly monotonic if it makes the driver +implementation easier, but are required to catch up with the previously reported +larger value within a reasonable period. Upon observing a value lower than what +was previously read, userspace is expected to stay with that larger previous +value until a monotonic update is seen.
+- drm-maxfreq-<str> <uint> [Hz|MHz|KHz]
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the maxium frequence for the given
maximum frequency
+engine. Taken together with drm-cycles-<str>, this can be used to calculate +percentage utilization of the engine, whereas drm-engine-<str> only refects
reflects
+time active without considering what frequency the engine is operating as a +percentage of it's maximum frequency.
Cycles vs max freq sounds very useful. My reservations is that how come the idea hasn't happened in the CPU world. Or maybe it has and I am un-informed?
I do often pay attention to both where tasks get scheduled, and the individual CPU freq when I'm profiling CPU side stuff (eg. in perfetto)
I could also report "always-count" cycles, I think, which could be used by gputop to derive freq. I'd have to think about that a bit, since keeping the result monotinic(ish) might be a bit tricky (the hw counter loses state across runtime suspend)
In any case, if going with this I think we need to clarify the text that the value should reflect the current soft limit, where the driver supports that, in case it has been set to lower than the maximum frequency hardware can support. I am thinking about avoiding "my gpu cannot hit 100%" support incidents in cases when user/admin lowered the soft limit for some reason. Possibly does not apply to msm but can apply to i915, if we decided to export the same data.
Yes, with pm-qos thermal or userspace could limit the max freq.. but we also internally use a pm-qos constraint to reduce freq when the GPU is idle, and I don't think there is a good way to differentiate *which* constraint is which. I'll add something involving the word "recommended" ;-)
Hmm, or on second thought, maybe it would be better to, for drivers that can, just report the soft limit separately?
BR, -R
BR, -R
No other gotchas come to mind at the moment.
Regards,
Tvrtko
- =============================== Driver specific implementations ===============================
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 14ab9a627d8b..57a66093e671 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -948,7 +948,24 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), };
-DEFINE_DRM_GEM_FOPS(fops); +static void msm_fop_show_fdinfo(struct seq_file *m, struct file *f) +{
struct drm_file *file = f->private_data;
struct drm_device *dev = file->minor->dev;
struct msm_drm_private *priv = dev->dev_private;
struct drm_printer p = drm_seq_file_printer(m);
if (!priv->gpu)
return;
msm_gpu_show_fdinfo(priv->gpu, file->driver_priv, &p);
+}
+static const struct file_operations fops = {
.owner = THIS_MODULE,
DRM_GEM_FOPS,
.show_fdinfo = msm_fop_show_fdinfo,
+};
static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..333a9a299b41 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -4,6 +4,8 @@
- Author: Rob Clark robdclark@gmail.com
*/
+#include "drm/drm_drv.h"
- #include "msm_gpu.h" #include "msm_gem.h" #include "msm_mmu.h"
@@ -146,6 +148,16 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) return 0; }
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p)
+{
drm_printf(p, "drm-driver:\t%s\n", gpu->dev->driver->name);
drm_printf(p, "drm-client-id:\t%u\n", ctx->seqno);
drm_printf(p, "drm-engine-gpu:\t%llu ns\n", ctx->elapsed_ns);
drm_printf(p, "drm-cycles-gpu:\t%llu\n", ctx->cycles);
drm_printf(p, "drm-maxfreq-gpu:\t%lu Hz\n", gpu->fast_rate);
+}
- int msm_gpu_hw_init(struct msm_gpu *gpu) { int ret;
@@ -652,7 +664,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, { int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats;
u64 elapsed, clock = 0;
u64 elapsed, clock = 0, cycles; unsigned long flags; stats = &ring->memptrs->stats[index];
@@ -660,12 +672,17 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; do_div(elapsed, 192);
cycles = stats->cpcycles_end - stats->cpcycles_start;
/* Calculate the clock frequency from the number of CP cycles */ if (elapsed) {
clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
clock = cycles * 1000; do_div(clock, elapsed); }
submit->queue->ctx->elapsed_ns += elapsed;
submit->queue->ctx->cycles += cycles;
trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6def00883046..4911943ba53b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -361,6 +361,22 @@ struct msm_file_private { /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ char *cmdline;
/**
* elapsed:
*
* The total (cumulative) elapsed time GPU was busy with rendering
* from this context in ns.
*/
uint64_t elapsed_ns;
/**
* cycles:
*
* The total (cumulative) GPU cycles elapsed attributed to this
* context.
*/
uint64_t cycles;
/** * entities: *
@@ -544,6 +560,9 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu);
+void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
struct drm_printer *p);
- int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id);
On 07/06/2022 17:08, Rob Clark wrote:
On Tue, Jun 7, 2022 at 9:02 AM Rob Clark robdclark@gmail.com wrote:
On Tue, Jun 7, 2022 at 1:56 AM Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote:
On 06/06/2022 20:54, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
Similar to AMD commit 874442541133 ("drm/amdgpu: Add show_fdinfo() interface"), using the infrastructure added in previous patches, we add basic client info and GPU engine utilisation for msm.
Example output:
# cat /proc/`pgrep glmark2`/fdinfo/6 pos: 0 flags: 02400002 mnt_id: 21 ino: 162 drm-driver: msm drm-client-id: 7 drm-engine-gpu: 1734371319 ns drm-cycles-gpu: 1153645024 drm-maxfreq-gpu: 800000000 Hz
See also: https://patchwork.freedesktop.org/patch/468505/
Signed-off-by: Rob Clark robdclark@chromium.org
Documentation/gpu/drm-usage-stats.rst | 21 +++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++++++++- drivers/gpu/drm/msm/msm_gpu.c | 21 +++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 19 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6c9f166a8d6f..60e5cc9c13ad 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -105,6 +105,27 @@ object belong to this client, in the respective memory region. Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB' indicating kibi- or mebi-bytes.
+- drm-cycles-<str> <uint>
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the number of busy cycles for the given +engine.
+Values are not required to be constantly monotonic if it makes the driver +implementation easier, but are required to catch up with the previously reported +larger value within a reasonable period. Upon observing a value lower than what +was previously read, userspace is expected to stay with that larger previous +value until a monotonic update is seen.
+- drm-maxfreq-<str> <uint> [Hz|MHz|KHz]
+Engine identifier string must be the same as the one specified in the +drm-engine-<str> tag and shall contain the maxium frequence for the given
maximum frequency
+engine. Taken together with drm-cycles-<str>, this can be used to calculate +percentage utilization of the engine, whereas drm-engine-<str> only refects
reflects
+time active without considering what frequency the engine is operating as a +percentage of it's maximum frequency.
Cycles vs max freq sounds very useful. My reservations is that how come the idea hasn't happened in the CPU world. Or maybe it has and I am un-informed?
I do often pay attention to both where tasks get scheduled, and the individual CPU freq when I'm profiling CPU side stuff (eg. in perfetto)
I could also report "always-count" cycles, I think, which could be used by gputop to derive freq. I'd have to think about that a bit, since keeping the result monotinic(ish) might be a bit tricky (the hw counter loses state across runtime suspend)
In any case, if going with this I think we need to clarify the text that the value should reflect the current soft limit, where the driver supports that, in case it has been set to lower than the maximum frequency hardware can support. I am thinking about avoiding "my gpu cannot hit 100%" support incidents in cases when user/admin lowered the soft limit for some reason. Possibly does not apply to msm but can apply to i915, if we decided to export the same data.
Yes, with pm-qos thermal or userspace could limit the max freq.. but we also internally use a pm-qos constraint to reduce freq when the GPU is idle, and I don't think there is a good way to differentiate *which* constraint is which. I'll add something involving the word "recommended" ;-)
Hmm, or on second thought, maybe it would be better to, for drivers that can, just report the soft limit separately?
Yes. I realized later soft-limit does not work, anything reported here has to be invariant otherwise userspace cannot make sense of the accumulated cycles vs changing freq. Max freq, as long as it is truly max, as you were proposing, works I think.
Regards,
Tvrtko
Hi
Am 06.06.22 um 21:54 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
The DEFINE_DRM_GEM_FOPS() helper is a bit limiting if a driver wants to provide additional file ops, like show_fdinfo().
Signed-off-by: Rob Clark robdclark@chromium.org
include/drm/drm_gem.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 9d7c61a122dc..dc88d4a2cdf6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -314,6 +314,23 @@ struct drm_gem_object { const struct drm_gem_object_funcs *funcs; };
+/**
- DRM_GEM_FOPS - Default drm GEM file operations
- This macro provides a shorthand for setting the GEM file ops in the
- &file_operations structure.
I would appreciate a reference to DEFINE_DRM_GEM_FOPS. Something along the lines of 'if all you need are the default ops, use DEFINE_DRM_GEM_FOPS'.
- */
+#define DRM_GEM_FOPS \
- .open = drm_open,\
- .release = drm_release,\
- .unlocked_ioctl = drm_ioctl,\
- .compat_ioctl = drm_compat_ioctl,\
- .poll = drm_poll,\
- .read = drm_read,\
- .llseek = noop_llseek,\
- .mmap = drm_gem_mmap
Only one empty line please.
/**
- DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers
- @name: name for the generated structure
@@ -330,14 +347,7 @@ struct drm_gem_object { #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\
Is there a specific reason why .owner is still set here? I suspect that DRM_GEM_FOPS is strictly for callback functions?
In any case
Acked-by: Thomas Zimmermann tzimmermann@suse.de
Best regards Thomas
.open = drm_open,\
.release = drm_release,\
.unlocked_ioctl = drm_ioctl,\
.compat_ioctl = drm_compat_ioctl,\
.poll = drm_poll,\
.read = drm_read,\
.llseek = noop_llseek,\
.mmap = drm_gem_mmap,\
DRM_GEM_FOPS,\
}
void drm_gem_object_release(struct drm_gem_object *obj);
On Mon, Jun 6, 2022 at 11:56 PM Thomas Zimmermann tzimmermann@suse.de wrote:
Hi
Am 06.06.22 um 21:54 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
The DEFINE_DRM_GEM_FOPS() helper is a bit limiting if a driver wants to provide additional file ops, like show_fdinfo().
Signed-off-by: Rob Clark robdclark@chromium.org
include/drm/drm_gem.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 9d7c61a122dc..dc88d4a2cdf6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -314,6 +314,23 @@ struct drm_gem_object { const struct drm_gem_object_funcs *funcs; };
+/**
- DRM_GEM_FOPS - Default drm GEM file operations
- This macro provides a shorthand for setting the GEM file ops in the
- &file_operations structure.
I would appreciate a reference to DEFINE_DRM_GEM_FOPS. Something along the lines of 'if all you need are the default ops, use DEFINE_DRM_GEM_FOPS'.
- */
+#define DRM_GEM_FOPS \
.open = drm_open,\
.release = drm_release,\
.unlocked_ioctl = drm_ioctl,\
.compat_ioctl = drm_compat_ioctl,\
.poll = drm_poll,\
.read = drm_read,\
.llseek = noop_llseek,\
.mmap = drm_gem_mmap
Only one empty line please.
/**
- DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers
- @name: name for the generated structure
@@ -330,14 +347,7 @@ struct drm_gem_object { #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\
Is there a specific reason why .owner is still set here? I suspect that DRM_GEM_FOPS is strictly for callback functions?
I was on the fence about that one, but it seemed better to not mix "magic" and the callbacks.. but I could be convinced in either direction
In any case
Acked-by: Thomas Zimmermann tzimmermann@suse.de
thx, I'll fixup the other nits in v3.
Best regards Thomas
.open = drm_open,\
.release = drm_release,\
.unlocked_ioctl = drm_ioctl,\
.compat_ioctl = drm_compat_ioctl,\
.poll = drm_poll,\
.read = drm_read,\
.llseek = noop_llseek,\
.mmap = drm_gem_mmap,\
DRM_GEM_FOPS,\ }
void drm_gem_object_release(struct drm_gem_object *obj);
-- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev
Hi
Am 07.06.22 um 16:58 schrieb Rob Clark:
On Mon, Jun 6, 2022 at 11:56 PM Thomas Zimmermann tzimmermann@suse.de wrote:
Hi
Am 06.06.22 um 21:54 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
The DEFINE_DRM_GEM_FOPS() helper is a bit limiting if a driver wants to provide additional file ops, like show_fdinfo().
Signed-off-by: Rob Clark robdclark@chromium.org
include/drm/drm_gem.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 9d7c61a122dc..dc88d4a2cdf6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -314,6 +314,23 @@ struct drm_gem_object { const struct drm_gem_object_funcs *funcs; };
+/**
- DRM_GEM_FOPS - Default drm GEM file operations
- This macro provides a shorthand for setting the GEM file ops in the
- &file_operations structure.
I would appreciate a reference to DEFINE_DRM_GEM_FOPS. Something along the lines of 'if all you need are the default ops, use DEFINE_DRM_GEM_FOPS'.
- */
+#define DRM_GEM_FOPS \
.open = drm_open,\
.release = drm_release,\
.unlocked_ioctl = drm_ioctl,\
.compat_ioctl = drm_compat_ioctl,\
.poll = drm_poll,\
.read = drm_read,\
.llseek = noop_llseek,\
.mmap = drm_gem_mmap
Only one empty line please.
/** * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers * @name: name for the generated structure @@ -330,14 +347,7 @@ struct drm_gem_object { #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\
Is there a specific reason why .owner is still set here? I suspect that DRM_GEM_FOPS is strictly for callback functions?
I was on the fence about that one, but it seemed better to not mix "magic" and the callbacks.. but I could be convinced in either direction
I think you made the right choice. It's cleaner and most drivers will want to use DEFINE_DRM_GEM_FOPS, which includes the magic.
Best regards Thomas
In any case
Acked-by: Thomas Zimmermann tzimmermann@suse.de
thx, I'll fixup the other nits in v3.
Best regards Thomas
.open = drm_open,\
.release = drm_release,\
.unlocked_ioctl = drm_ioctl,\
.compat_ioctl = drm_compat_ioctl,\
.poll = drm_poll,\
.read = drm_read,\
.llseek = noop_llseek,\
.mmap = drm_gem_mmap,\
DRM_GEM_FOPS,\ }
void drm_gem_object_release(struct drm_gem_object *obj);
-- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev
dri-devel@lists.freedesktop.org