This patch series add support for loadavg values for GPU sub-components. I am adding a SMA algorithm as I was not really sure if EWMA would be a good fit for this use case.
Christian Gmeiner (4): drm/etnaviv: add simple moving average (SMA) drm/etnaviv: add loadavg accounting drm/etnaviv: show loadavg in debugfs drm/etnaviv: export loadavg via perfmon
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 14 ++++ drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 44 ++++++++++++- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 29 +++++++++ drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 79 +++++++++++++++++++++++ drivers/gpu/drm/etnaviv/etnaviv_sma.h | 53 +++++++++++++++ 5 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sma.h
This adds a SMA algorithm inspired by Exponentially weighted moving average (EWMA) algorithm found in the kernel.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_sma.h | 53 +++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sma.h
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sma.h b/drivers/gpu/drm/etnaviv/etnaviv_sma.h new file mode 100644 index 000000000000..81564d5cbdc3 --- /dev/null +++ b/drivers/gpu/drm/etnaviv/etnaviv_sma.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Etnaviv Project + */ + +#ifndef __ETNAVIV_SMA_H__ +#define __ETNAVIV_SMA_H__ + +#include <linux/bug.h> +#include <linux/compiler.h> + +/* + * Simple moving average (SMA) + * + * This implements a fixed-size SMA algorithm. + * + * The first argument to the macro is the name that will be used + * for the struct and helper functions. + * + * The second argument, the samples, expresses how many samples are + * used for the SMA algorithm. + */ + +#define DECLARE_SMA(name, _samples) \ + struct sma_##name { \ + unsigned long pos; \ + unsigned long sum; \ + unsigned long samples[_samples]; \ + }; \ + static inline void sma_##name##_init(struct sma_##name *s) \ + { \ + BUILD_BUG_ON(!__builtin_constant_p(_samples)); \ + memset(s, 0, sizeof(struct sma_##name)); \ + } \ + static inline unsigned long sma_##name##_read(struct sma_##name *s) \ + { \ + BUILD_BUG_ON(!__builtin_constant_p(_samples)); \ + return s->sum / _samples; \ + } \ + static inline void sma_##name##_add(struct sma_##name *s, unsigned long val) \ + { \ + unsigned long pos = READ_ONCE(s->pos); \ + unsigned long sum = READ_ONCE(s->sum); \ + unsigned long sample = READ_ONCE(s->samples[pos]); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_samples)); \ + \ + WRITE_ONCE(s->sum, sum - sample + val); \ + WRITE_ONCE(s->samples[pos], val); \ + WRITE_ONCE(s->pos, pos + 1 == _samples ? 0 : pos + 1); \ + } + +#endif /* __ETNAVIV_SMA_H__ */
The GPU has an idle state register where each bit represents the idle state of a sub-GPU component like FE or TX. Sample this register every 10ms and calculate a simple moving average over the sub-GPU component idle states with a total observation time frame of 1s.
This provides us with a percentage based load of each sub-GPU component.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 14 ++++++++++++ drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 32 +++++++++++++++++++++++++++ drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 29 ++++++++++++++++++++++++ 3 files changed, 75 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index f9afe11c50f0..b31920241c86 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -46,6 +46,19 @@ static void load_gpu(struct drm_device *dev) } }
+static void unload_gpu(struct drm_device *dev) +{ + struct etnaviv_drm_private *priv = dev->dev_private; + unsigned int i; + + for (i = 0; i < ETNA_MAX_PIPES; i++) { + struct etnaviv_gpu *g = priv->gpu[i]; + + if (g) + etnaviv_gpu_shutdown(g); + } +} + static int etnaviv_open(struct drm_device *dev, struct drm_file *file) { struct etnaviv_drm_private *priv = dev->dev_private; @@ -581,6 +594,7 @@ static void etnaviv_unbind(struct device *dev) struct drm_device *drm = dev_get_drvdata(dev); struct etnaviv_drm_private *priv = drm->dev_private;
+ unload_gpu(drm); drm_dev_unregister(drm);
component_unbind_all(dev, drm); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a31eeff2b297..1f0eb7e00657 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -714,6 +714,28 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu) gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U); }
+static void etnaviv_loadavg_function(struct timer_list *t) +{ + struct etnaviv_gpu *gpu = from_timer(gpu, t, loadavg_timer); + const u32 idle = gpu_read(gpu, VIVS_HI_IDLE_STATE); + int i; + + for (i = 0; i < ARRAY_SIZE(etna_idle_module_names); i++) + if ((idle & etna_idle_module_names[i].bit)) + sma_loadavg_add(&gpu->loadavg_value[i], 0); + else + sma_loadavg_add(&gpu->loadavg_value[i], 100); + + spin_lock_bh(&gpu->loadavg_spinlock); + + for (i = 0; i < ARRAY_SIZE(etna_idle_module_names); i++) + gpu->loadavg_percentage[i] = sma_loadavg_read(&gpu->loadavg_value[i]); + + spin_unlock_bh(&gpu->loadavg_spinlock); + + mod_timer(t, jiffies + msecs_to_jiffies(10)); +} + int etnaviv_gpu_init(struct etnaviv_gpu *gpu) { struct etnaviv_drm_private *priv = gpu->drm->dev_private; @@ -804,6 +826,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) for (i = 0; i < ARRAY_SIZE(gpu->event); i++) complete(&gpu->event_free);
+ /* Setup loadavg timer */ + timer_setup(&gpu->loadavg_timer, etnaviv_loadavg_function, 0); + mod_timer(&gpu->loadavg_timer, jiffies + msecs_to_jiffies(10)); + /* Now program the hardware */ mutex_lock(&gpu->lock); etnaviv_gpu_hw_init(gpu); @@ -824,6 +850,11 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) return ret; }
+void etnaviv_gpu_shutdown(struct etnaviv_gpu *gpu) +{ + del_timer(&gpu->loadavg_timer); +} + #ifdef CONFIG_DEBUG_FS struct dma_debug { u32 address[2]; @@ -1762,6 +1793,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) gpu->dev = &pdev->dev; mutex_init(&gpu->lock); mutex_init(&gpu->fence_lock); + spin_lock_init(&gpu->loadavg_spinlock);
/* Map registers: */ gpu->mmio = devm_platform_ioremap_resource(pdev, 0); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 8ea48697d132..a5b9c89c6744 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -10,6 +10,8 @@ #include "etnaviv_gem.h" #include "etnaviv_mmu.h" #include "etnaviv_drv.h" +#include "etnaviv_sma.h" +#include "state_hi.xml.h"
struct etnaviv_gem_submit; struct etnaviv_vram_mapping; @@ -91,6 +93,26 @@ struct clk;
#define ETNA_NR_EVENTS 30
+DECLARE_SMA(loadavg, 100) + +static const struct { + const char *name; + u32 bit; +} etna_idle_module_names[] = { + { "FE", VIVS_HI_IDLE_STATE_FE }, + { "DE", VIVS_HI_IDLE_STATE_DE }, + { "PE", VIVS_HI_IDLE_STATE_PE }, + { "SH", VIVS_HI_IDLE_STATE_SH }, + { "PA", VIVS_HI_IDLE_STATE_PA }, + { "SE", VIVS_HI_IDLE_STATE_SE }, + { "RA", VIVS_HI_IDLE_STATE_RA }, + { "TX", VIVS_HI_IDLE_STATE_TX }, + { "VG", VIVS_HI_IDLE_STATE_VG }, + { "IM", VIVS_HI_IDLE_STATE_IM }, + { "FP", VIVS_HI_IDLE_STATE_FP }, + { "TS", VIVS_HI_IDLE_STATE_TS }, +}; + struct etnaviv_gpu { struct drm_device *drm; struct thermal_cooling_device *cooling; @@ -145,6 +167,12 @@ struct etnaviv_gpu { unsigned int freq_scale; unsigned long base_rate_core; unsigned long base_rate_shader; + + /* Loadavg: */ + struct timer_list loadavg_timer; + spinlock_t loadavg_spinlock; + struct sma_loadavg loadavg_value[ARRAY_SIZE(etna_idle_module_names)]; + unsigned int loadavg_percentage[ARRAY_SIZE(etna_idle_module_names)]; };
static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data) @@ -160,6 +188,7 @@ static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg) int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
int etnaviv_gpu_init(struct etnaviv_gpu *gpu); +void etnaviv_gpu_shutdown(struct etnaviv_gpu *gpu); bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu);
#ifdef CONFIG_DEBUG_FS
Hi Christian,
Am Freitag, den 10.07.2020, 09:41 +0200 schrieb Christian Gmeiner:
This isn't guaranteed to work on a clock/power gated GPU. Also we surely don't want to wake a idle system every 10ms just to sample a "no load" value, so this needs some integration with runtime PM, to disable the sampling when the GPU is powered down and enable when powered up. The loadavg must be able to adapt to jumps in the sampling interval while idle.
A jiffies based timer is much too coarse for a regular 10ms sampling. On a typical 100Hz system 10ms is a single jiffy, so your timer will fire anywhere in the range of ~0ms...~20ms. This won't get us a usable measurement.
Regards, Lucas
Hoi Lucas,
Am Fr., 10. Juli 2020 um 10:19 Uhr schrieb Lucas Stach l.stach@pengutronix.de:
Oh yea.. runtime PM.. I thought I was missing something. Will tackle this in the next version.
Makes sense.. will switch to hrtimers.
Might be helpful to see the loadavg in debugfs.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 1f0eb7e00657..82fe4aafed57 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -884,7 +884,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) { struct dma_debug debug; u32 dma_lo, dma_hi, axi, idle; - int ret; + int ret, i;
seq_printf(m, "%s Status:\n", dev_name(gpu->dev));
@@ -1002,6 +1002,16 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) if (idle & VIVS_HI_IDLE_STATE_AXI_LP) seq_puts(m, "\t AXI low power mode\n");
+ seq_printf(m, "\tload:\n"); + spin_lock_bh(&gpu->loadavg_spinlock); + + for (i = 0; i < ARRAY_SIZE(etna_idle_module_names); i++) + seq_printf(m, "\t %s: %u%%\n", + etna_idle_module_names[i].name, + gpu->loadavg_percentage[i]); + + spin_unlock_bh(&gpu->loadavg_spinlock); + if (gpu->identity.features & chipFeatures_DEBUG_MODE) { u32 read0 = gpu_read(gpu, VIVS_MC_DEBUG_READ0); u32 read1 = gpu_read(gpu, VIVS_MC_DEBUG_READ1);
Make it possible to access the sub-GPU component load value from user space with the perfmon infrastructure.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 79 +++++++++++++++++++++++ 1 file changed, 79 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 75f9db8f7bec..614d86e2802d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -98,6 +98,19 @@ static u32 hi_total_idle_cycle_read(struct etnaviv_gpu *gpu, return gpu_read(gpu, reg); }
+static u32 load_read(struct etnaviv_gpu *gpu, + const struct etnaviv_pm_domain *domain, + const struct etnaviv_pm_signal *signal) +{ + u32 load; + + spin_lock_bh(&gpu->loadavg_spinlock); + load = gpu->loadavg_percentage[signal->data]; + spin_unlock_bh(&gpu->loadavg_spinlock); + + return load; +} + static const struct etnaviv_pm_domain doms_3d[] = { { .name = "HI", @@ -387,6 +400,72 @@ static const struct etnaviv_pm_domain doms_3d[] = { &perf_reg_read } } + }, + { + .name = "LOAD", + .nr_signals = 12, + .signal = (const struct etnaviv_pm_signal[]) { + { + "FE", + 0, + &load_read + }, + { + "DE", + 1, + &load_read + }, + { + "PE", + 2, + &load_read + }, + { + "SH", + 3, + &load_read + }, + { + "PA", + 4, + &load_read + }, + { + "SE", + 5, + &load_read + }, + { + "RA", + 6, + &load_read + }, + { + "TX", + 7, + &load_read + }, + { + "VG", + 8, + &load_read + }, + { + "IM", + 9, + &load_read + }, + { + "FP", + 10, + &load_read + }, + { + "TS", + 11, + &load_read + } + } } };
Hi Christian,
Am Freitag, den 10.07.2020, 09:41 +0200 schrieb Christian Gmeiner:
1 second is a pretty long window in GPU time. Why do you feel that a simple moving average is more appropriate than a exponentially weighted one here? Note that I haven't given this any thought myself and haven't made up my mind yet, so this is a honest question to understand the reasoning behind your choice.
Regards, Lucas
Hoi Lucas
Am Fr., 10. Juli 2020 um 10:31 Uhr schrieb Lucas Stach l.stach@pengutronix.de:
I played with both variants but I 'feel' that SMA might be a better fit. To be honest I have no background in signal processing and stuff like this so.. I will go the route you guide me to :) I have kept the "interface" for SMA equal to the one EWMA uses so I can easily switch between them.
Hi Lucas,
Am Fr., 10. Juli 2020 um 10:44 Uhr schrieb Christian Gmeiner christian.gmeiner@gmail.com:
I have v2 ready except for this point. If you want to go with EWMA could you provide me with a good weight reciprocal value to use?
dri-devel@lists.freedesktop.org