With 'sync points' we can sample the reqeustes perform signals before and/or after the submited command buffer.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 112 +++++++++++++++++++++++++++++----- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 ++ 2 files changed, 102 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 0766861..2e9f031 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1313,12 +1313,47 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) pm_runtime_put_autosuspend(gpu->dev); }
+static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, + struct etnaviv_event *event, unsigned int flags) +{ + unsigned int i; + + for (i = 0; i < event->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = event->pmrs + i; + + if (pmr->flags == flags) + etnaviv_perfmon_process(gpu, pmr); + } +} + +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); +} + +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + unsigned int i; + + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); + + for (i = 0; i < event->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = event->pmrs + i; + + *pmr->bo_vma = pmr->sequence; + } +} + + /* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { struct dma_fence *fence; unsigned int event, i; + unsigned int sync[2] = { ~0U, ~0U }; int ret;
ret = etnaviv_gpu_pm_get_sync(gpu); @@ -1341,6 +1376,39 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_pm_put; }
+ /* + * if there are performance monitor requests we need to have a sync point to + * re-configure gpu and process ETNA_PM_PROCESS_PRE requests. + */ + if (cmdbuf->nr_pmrs) { + sync[0] = event_alloc(gpu); + + if (unlikely(sync[0] == ~0U)) { + DRM_ERROR("no free events for sync point 0\n"); + event_free(gpu, event); + ret = -EBUSY; + goto out_pm_put; + } + } + + /* + * if there are performance monitor requests we need to have sync point to + * re-configure gpu, process ETNA_PM_PROCESS_POST requests and update the + * sequence number for userspace. + */ + if (cmdbuf->nr_pmrs) { + sync[1] = event_alloc(gpu); + + if (unlikely(sync[1] == ~0U)) { + DRM_ERROR("no free events for sync point 1\n"); + event_free(gpu, event); + if (unlikely(sync[0] == ~0U)) + event_free(gpu, sync[0]); + ret = -EBUSY; + goto out_pm_put; + } + } + mutex_lock(&gpu->lock);
fence = etnaviv_gpu_fence_alloc(gpu); @@ -1360,8 +1428,22 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->lastctx = cmdbuf->ctx; }
+ if (sync[0] != ~0U) { + gpu->event[sync[0]].sync_point = &sync_point_perfmon_sample_pre; + gpu->event[sync[0]].nr_pmrs = cmdbuf->nr_pmrs; + gpu->event[sync[0]].pmrs = cmdbuf->pmrs; + etnaviv_sync_point_queue(gpu, sync[0]); + } + etnaviv_buffer_queue(gpu, event, cmdbuf);
+ if (sync[1] != ~0U) { + gpu->event[sync[1]].sync_point = &sync_point_perfmon_sample_post; + gpu->event[sync[1]].nr_pmrs = cmdbuf->nr_pmrs; + gpu->event[sync[1]].pmrs = cmdbuf->pmrs; + etnaviv_sync_point_queue(gpu, sync[1]); + } + cmdbuf->fence = fence; list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
@@ -1455,20 +1537,22 @@ static irqreturn_t irq_handler(int irq, void *data) etnaviv_process_sync_point(gpu, &gpu->event[event]);
fence = gpu->event[event].fence; - gpu->event[event].fence = NULL; - dma_fence_signal(fence); - - /* - * Events can be processed out of order. Eg, - * - allocate and queue event 0 - * - allocate event 1 - * - event 0 completes, we process it - * - allocate and queue event 0 - * - event 1 and event 0 complete - * we can end up processing event 0 first, then 1. - */ - if (fence_after(fence->seqno, gpu->completed_fence)) - gpu->completed_fence = fence->seqno; + if (fence) { + gpu->event[event].fence = NULL; + dma_fence_signal(fence); + + /* + * Events can be processed out of order. Eg, + * - allocate and queue event 0 + * - allocate event 1 + * - event 0 completes, we process it + * - allocate and queue event 0 + * - event 1 and event 0 complete + * we can end up processing event 0 first, then 1. + */ + if (fence_after(fence->seqno, gpu->completed_fence)) + gpu->completed_fence = fence->seqno; + }
event_free(gpu, event); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index fee6ed9..71375ab 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -92,6 +92,10 @@ struct etnaviv_event { struct dma_fence *fence;
void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); + + /* performance monitor requests */ + unsigned int nr_pmrs; + struct etnaviv_perfmon_request *pmrs; };
struct etnaviv_cmdbuf_suballoc;
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 55 +++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index a8518bd..9079ffc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -17,6 +17,7 @@
#include "etnaviv_gpu.h" #include "etnaviv_perfmon.h" +#include "state_hi.xml.h"
struct etnaviv_pm_domain;
@@ -31,11 +32,65 @@ struct etnaviv_pm_signal {
struct etnaviv_pm_domain { char name[64]; + + /* profile register */ + u32 profile_read; + u32 profile_config; + u8 nr_signals; const struct etnaviv_pm_signal *signal; };
+static u32 simple_reg_read(struct etnaviv_gpu *gpu, + const struct etnaviv_pm_domain *domain, + const struct etnaviv_pm_signal *signal) +{ + return gpu_read(gpu, signal->data); +} + +static u32 perf_reg_read(struct etnaviv_gpu *gpu, + const struct etnaviv_pm_domain *domain, + const struct etnaviv_pm_signal *signal) +{ + gpu_write(gpu, domain->profile_config, signal->data); + + return gpu_read(gpu, domain->profile_read); +} + static const struct etnaviv_pm_domain doms[] = { + { + .name = "HI", + .profile_read = VIVS_MC_PROFILE_HI_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG2, + .nr_signals = 5, + .signal = (const struct etnaviv_pm_signal[]) { + { + "TOTAL_CYCLES", + VIVS_HI_PROFILE_TOTAL_CYCLES, + &simple_reg_read + }, + { + "IDLE_CYCLES", + VIVS_HI_PROFILE_IDLE_CYCLES, + &simple_reg_read + }, + { + "AXI_CYCLES_READ_REQUEST_STALLED", + VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_READ_REQUEST_STALLED, + &perf_reg_read + }, + { + "AXI_CYCLES_WRITE_REQUEST_STALLED", + VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_REQUEST_STALLED, + &perf_reg_read + }, + { + "AXI_CYCLES_WRITE_DATA_STALLED", + VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_DATA_STALLED, + &perf_reg_read + } + } + } };
int etnaviv_pm_query_dom(struct etnaviv_gpu *gpu,
We need to iterate over all pixel pipelines to get overall value.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 9079ffc..3bda13c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -57,6 +57,25 @@ static u32 perf_reg_read(struct etnaviv_gpu *gpu, return gpu_read(gpu, domain->profile_read); }
+static u32 pipe_reg_read(struct etnaviv_gpu *gpu, + const struct etnaviv_pm_domain *domain, + const struct etnaviv_pm_signal *signal) +{ + u32 clock = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + u32 value = 0; + unsigned i; + + for (i = 0; i < gpu->identity.pixel_pipes; i++) { + clock &= ~(VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__MASK); + clock |= VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE(i); + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock); + gpu_write(gpu, domain->profile_config, signal->data); + value += gpu_read(gpu, domain->profile_read); + } + + return value; +} + static const struct etnaviv_pm_domain doms[] = { { .name = "HI", @@ -90,6 +109,39 @@ static const struct etnaviv_pm_domain doms[] = { &perf_reg_read } } + }, + { + .name = "PE", + .profile_read = VIVS_MC_PROFILE_PE_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG0, + .nr_signals = 5, + .signal = (const struct etnaviv_pm_signal[]) { + { + "PIXEL_COUNT_KILLED_BY_COLOR_PIPE", + VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_COLOR_PIPE, + &pipe_reg_read + }, + { + "PIXEL_COUNT_KILLED_BY_DEPTH_PIPE", + VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_DEPTH_PIPE, + &pipe_reg_read + }, + { + "PIXEL_COUNT_DRAWN_BY_COLOR_PIPE", + VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_COLOR_PIPE, + &pipe_reg_read + }, + { + "PIXEL_COUNT_DRAWN_BY_DEPTH_PIPE", + VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_DEPTH_PIPE, + &pipe_reg_read + }, + { + "PIXELS_RENDERED_2D", + VIVS_MC_PROFILE_CONFIG0_PE_PIXELS_RENDERED_2D, + &pipe_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 3bda13c..315c49d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -142,6 +142,59 @@ static const struct etnaviv_pm_domain doms[] = { &pipe_reg_read } } + }, + { + .name = "SH", + .profile_read = VIVS_MC_PROFILE_SH_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG0, + .nr_signals = 9, + .signal = (const struct etnaviv_pm_signal[]) { + { + "SHADER_CYCLES", + VIVS_MC_PROFILE_CONFIG0_SH_SHADER_CYCLES, + &perf_reg_read + }, + { + "PS_INST_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_PS_INST_COUNTER, + &perf_reg_read + }, + { + "RENDERED_PIXEL_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_PIXEL_COUNTER, + &perf_reg_read + }, + { + "VS_INST_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_VS_INST_COUNTER, + &pipe_reg_read + }, + { + "RENDERED_VERTICE_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_VERTICE_COUNTER, + &pipe_reg_read + }, + { + "VTX_BRANCH_INST_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_VTX_BRANCH_INST_COUNTER, + &pipe_reg_read + }, + { + "VTX_TEXLD_INST_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_VTX_TEXLD_INST_COUNTER, + &pipe_reg_read + }, + { + "PXL_BRANCH_INST_COUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_PXL_BRANCH_INST_COUNTER, + &pipe_reg_read + }, + { + "PXL_TEXLD_INST_COUNTERUNTER", + VIVS_MC_PROFILE_CONFIG0_SH_PXL_TEXLD_INST_COUNTER, + &pipe_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 315c49d..e0b7cae 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -195,6 +195,44 @@ static const struct etnaviv_pm_domain doms[] = { &pipe_reg_read } } + }, + { + .name = "PA", + .profile_read = VIVS_MC_PROFILE_PA_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG1, + .nr_signals = 6, + .signal = (const struct etnaviv_pm_signal[]) { + { + "INPUT_VTX_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_INPUT_VTX_COUNTER, + &perf_reg_read + }, + { + "INPUT_PRIM_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_INPUT_PRIM_COUNTER, + &perf_reg_read + }, + { + "OUTPUT_PRIM_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_OUTPUT_PRIM_COUNTER, + &perf_reg_read + }, + { + "DEPTH_CLIPPED_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_DEPTH_CLIPPED_COUNTER, + &pipe_reg_read + }, + { + "TRIVIAL_REJECTED_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_TRIVIAL_REJECTED_COUNTER, + &pipe_reg_read + }, + { + "CULLED_COUNTER", + VIVS_MC_PROFILE_CONFIG1_PA_CULLED_COUNTER, + &pipe_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index e0b7cae..4e42c90 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -233,6 +233,24 @@ static const struct etnaviv_pm_domain doms[] = { &pipe_reg_read } } + }, + { + .name = "SE", + .profile_read = VIVS_MC_PROFILE_SE_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG1, + .nr_signals = 2, + .signal = (const struct etnaviv_pm_signal[]) { + { + "CULLED_TRIANGLE_COUNT", + VIVS_MC_PROFILE_CONFIG1_SE_CULLED_TRIANGLE_COUNT, + &perf_reg_read + }, + { + "CULLED_LINES_COUNT", + VIVS_MC_PROFILE_CONFIG1_SE_CULLED_LINES_COUNT, + &perf_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 4e42c90..9243f73 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -251,6 +251,49 @@ static const struct etnaviv_pm_domain doms[] = { &perf_reg_read } } + }, + { + .name = "RA", + .profile_read = VIVS_MC_PROFILE_RA_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG1, + .nr_signals = 7, + .signal = (const struct etnaviv_pm_signal[]) { + { + "VALID_PIXEL_COUNT", + VIVS_MC_PROFILE_CONFIG1_RA_VALID_PIXEL_COUNT, + &perf_reg_read + }, + { + "TOTAL_QUAD_COUNT", + VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_QUAD_COUNT, + &perf_reg_read + }, + { + "VALID_QUAD_COUNT_AFTER_EARLY_Z", + VIVS_MC_PROFILE_CONFIG1_RA_VALID_QUAD_COUNT_AFTER_EARLY_Z, + &perf_reg_read + }, + { + "TOTAL_PRIMITIVE_COUNT", + VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_PRIMITIVE_COUNT, + &perf_reg_read + }, + { + "PIPE_CACHE_MISS_COUNTER", + VIVS_MC_PROFILE_CONFIG1_RA_PIPE_CACHE_MISS_COUNTER, + &perf_reg_read + }, + { + "PREFETCH_CACHE_MISS_COUNTER", + VIVS_MC_PROFILE_CONFIG1_RA_PREFETCH_CACHE_MISS_COUNTER, + &perf_reg_read + }, + { + "CULLED_QUAD_COUNT", + VIVS_MC_PROFILE_CONFIG1_RA_CULLED_QUAD_COUNT, + &perf_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 9243f73..c5bcbb3 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -294,6 +294,59 @@ static const struct etnaviv_pm_domain doms[] = { &perf_reg_read } } + }, + { + .name = "TX", + .profile_read = VIVS_MC_PROFILE_TX_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG1, + .nr_signals = 9, + .signal = (const struct etnaviv_pm_signal[]) { + { + "TOTAL_BILINEAR_REQUESTS", + VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_BILINEAR_REQUESTS, + &perf_reg_read + }, + { + "TOTAL_TRILINEAR_REQUESTS", + VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TRILINEAR_REQUESTS, + &perf_reg_read + }, + { + "TOTAL_DISCARDED_TEXTURE_REQUESTS", + VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_DISCARDED_TEXTURE_REQUESTS, + &perf_reg_read + }, + { + "TOTAL_TEXTURE_REQUESTS", + VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TEXTURE_REQUESTS, + &perf_reg_read + }, + { + "MEM_READ_COUNT", + VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_COUNT, + &perf_reg_read + }, + { + "MEM_READ_IN_8B_COUNT", + VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_IN_8B_COUNT, + &perf_reg_read + }, + { + "CACHE_MISS_COUNT", + VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_COUNT, + &perf_reg_read + }, + { + "CACHE_HIT_TEXEL_COUNT", + VIVS_MC_PROFILE_CONFIG1_TX_CACHE_HIT_TEXEL_COUNT, + &perf_reg_read + }, + { + "CACHE_MISS_TEXEL_COUNT", + VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_TEXEL_COUNT, + &perf_reg_read + } + } } };
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_perfmon.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index c5bcbb3..63acda0 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -347,6 +347,29 @@ static const struct etnaviv_pm_domain doms[] = { &perf_reg_read } } + }, + { + .name = "MC", + .profile_read = VIVS_MC_PROFILE_MC_READ, + .profile_config = VIVS_MC_PROFILE_CONFIG2, + .nr_signals = 3, + .signal = (const struct etnaviv_pm_signal[]) { + { + "TOTAL_READ_REQ_8B_FROM_PIPELINE", + VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_PIPELINE, + &perf_reg_read + }, + { + "TOTAL_READ_REQ_8B_FROM_IP", + VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_IP, + &perf_reg_read + }, + { + "TOTAL_WRITE_REQ_8B_FROM_PIPELINE", + VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_PIPELINE, + &perf_reg_read + } + } } };
As done by Vivante kernel driver.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 2e9f031..1e23472 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1329,6 +1329,13 @@ static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, struct etnaviv_event *event) { + u32 val; + + /* disable clock gating */ + val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); + val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; + gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val); + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); }
@@ -1336,6 +1343,7 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, struct etnaviv_event *event) { unsigned int i; + u32 val;
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
@@ -1344,6 +1352,11 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
*pmr->bo_vma = pmr->sequence; } + + /* enable clock gating */ + val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); + val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; + gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val); }
Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
As done by Vivante kernel driver.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com
Reviewed-by: Lucas Stach l.stach@pengutronix.de
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 2e9f031..1e23472 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1329,6 +1329,13 @@ static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, struct etnaviv_event *event) {
- u32 val;
- /* disable clock gating */
- val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
- val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
- gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); } @@ -1336,6 +1343,7 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, struct etnaviv_event *event) { unsigned int i;
- u32 val;
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); @@ -1344,6 +1352,11 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, *pmr->bo_vma = pmr->sequence; }
- /* enable clock gating */
- val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
- val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
- gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
}
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 1e23472..faf2925 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1336,6 +1336,11 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
+ /* enable debug register */ + val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + val &= ~VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS; + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val); + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); }
@@ -1353,6 +1358,11 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, *pmr->bo_vma = pmr->sequence; }
+ /* disable debug register */ + val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + val |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS; + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val); + /* enable clock gating */ val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
Please add a short description, stating that the perfmon registers are debug regs.
Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com
Reviewed-by: Lucas Stach l.stach@pengutronix.de
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 1e23472..faf2925 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1336,6 +1336,11 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
- /* enable debug register */
- val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
- val &= ~VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS;
- gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val);
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); } @@ -1353,6 +1358,11 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, *pmr->bo_vma = pmr->sequence; }
- /* disable debug register */
- val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
- val |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS;
- gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val);
/* enable clock gating */ val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
We increment the minor driver version so userspace can detect perfmon support.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 536760a..eec6c00 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -550,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 1, + .minor = 2, };
/*
Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
We increment the minor driver version so userspace can detect perfmon support.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com
Reviewed-by: Lucas Stach l.stach@pengutronix.de
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 536760a..eec6c00 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -550,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1,
- .minor = 1,
- .minor = 2,
}; /*
Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
With 'sync points' we can sample the reqeustes perform signals before and/or after the submited command buffer.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 112 +++++++++++++++++++++++++++++----- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 ++ 2 files changed, 102 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 0766861..2e9f031 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1313,12 +1313,47 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) pm_runtime_put_autosuspend(gpu->dev); } +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
- struct etnaviv_event *event, unsigned int flags)
+{
- unsigned int i;
- for (i = 0; i < event->nr_pmrs; i++) {
const struct etnaviv_perfmon_request *pmr = event-
pmrs + i;
if (pmr->flags == flags)
etnaviv_perfmon_process(gpu, pmr);
- }
+}
+static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
- struct etnaviv_event *event)
+{
- sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
+}
+static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
- struct etnaviv_event *event)
+{
- unsigned int i;
- sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
- for (i = 0; i < event->nr_pmrs; i++) {
const struct etnaviv_perfmon_request *pmr = event-
pmrs + i;
*pmr->bo_vma = pmr->sequence;
- }
+}
/* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { struct dma_fence *fence; unsigned int event, i;
- unsigned int sync[2] = { ~0U, ~0U };
int ret; ret = etnaviv_gpu_pm_get_sync(gpu); @@ -1341,6 +1376,39 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_pm_put; }
- /*
- * if there are performance monitor requests we need to have
a sync point to
- * re-configure gpu and process ETNA_PM_PROCESS_PRE
requests.
- */
- if (cmdbuf->nr_pmrs) {
sync[0] = event_alloc(gpu);
if (unlikely(sync[0] == ~0U)) {
DRM_ERROR("no free events for sync point
0\n");
event_free(gpu, event);
ret = -EBUSY;
goto out_pm_put;
}
- }
- /*
- * if there are performance monitor requests we need to have
sync point to
- * re-configure gpu, process ETNA_PM_PROCESS_POST requests
and update the
- * sequence number for userspace.
- */
- if (cmdbuf->nr_pmrs) {
sync[1] = event_alloc(gpu);
if (unlikely(sync[1] == ~0U)) {
DRM_ERROR("no free events for sync point
1\n");
event_free(gpu, event);
if (unlikely(sync[0] == ~0U))
event_free(gpu, sync[0]);
ret = -EBUSY;
goto out_pm_put;
}
- }
This is dangerous. We aren't holding the GPU lock at this point (and we can't because of livelocks with the GPU hangchecker), so given enough parallel submits with PMRs all the submits might abort as they can't allocate enough events, as each one might hold one out of the available events.
I think what we need here is to extend the event_alloc API to take the number of events we need and grab them all at once under the event spinlock.
mutex_lock(&gpu->lock); fence = etnaviv_gpu_fence_alloc(gpu); @@ -1360,8 +1428,22 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->lastctx = cmdbuf->ctx; }
- if (sync[0] != ~0U) {
gpu->event[sync[0]].sync_point =
&sync_point_perfmon_sample_pre;
gpu->event[sync[0]].nr_pmrs = cmdbuf->nr_pmrs;
gpu->event[sync[0]].pmrs = cmdbuf->pmrs;
etnaviv_sync_point_queue(gpu, sync[0]);
- }
etnaviv_buffer_queue(gpu, event, cmdbuf);
- if (sync[1] != ~0U) {
gpu->event[sync[1]].sync_point =
&sync_point_perfmon_sample_post;
gpu->event[sync[1]].nr_pmrs = cmdbuf->nr_pmrs;
gpu->event[sync[1]].pmrs = cmdbuf->pmrs;
etnaviv_sync_point_queue(gpu, sync[1]);
- }
cmdbuf->fence = fence; list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); @@ -1455,20 +1537,22 @@ static irqreturn_t irq_handler(int irq, void *data) etnaviv_process_sync_point(gpu, &gpu->event[event]); fence = gpu->event[event].fence;
gpu->event[event].fence = NULL;
dma_fence_signal(fence);
/*
* Events can be processed out of
order. Eg,
* - allocate and queue event 0
* - allocate event 1
* - event 0 completes, we process it
* - allocate and queue event 0
* - event 1 and event 0 complete
* we can end up processing event 0 first,
then 1.
*/
if (fence_after(fence->seqno, gpu-
completed_fence))
gpu->completed_fence = fence->seqno;
if (fence) {
gpu->event[event].fence = NULL;
dma_fence_signal(fence);
/*
* Events can be processed out of
order. Eg,
* - allocate and queue event 0
* - allocate event 1
* - event 0 completes, we process
it
* - allocate and queue event 0
* - event 1 and event 0 complete
* we can end up processing event 0
first, then 1.
*/
if (fence_after(fence->seqno, gpu-
completed_fence))
gpu->completed_fence =
fence->seqno;
}
event_free(gpu, event); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index fee6ed9..71375ab 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -92,6 +92,10 @@ struct etnaviv_event { struct dma_fence *fence; void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
- /* performance monitor requests */
- unsigned int nr_pmrs;
- struct etnaviv_perfmon_request *pmrs;
This should be a pointer to the cmdbuf itself, so we don't copy the information to various places.
}; struct etnaviv_cmdbuf_suballoc;
2017-06-26 15:41 GMT+02:00 Lucas Stach l.stach@pengutronix.de:
Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
With 'sync points' we can sample the reqeustes perform signals before and/or after the submited command buffer.
Signed-off-by: Christian Gmeiner christian.gmeiner@gmail.com
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 112 +++++++++++++++++++++++++++++----- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 ++ 2 files changed, 102 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 0766861..2e9f031 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1313,12 +1313,47 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) pm_runtime_put_autosuspend(gpu->dev); }
+static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
struct etnaviv_event *event, unsigned int flags)
+{
unsigned int i;
for (i = 0; i < event->nr_pmrs; i++) {
const struct etnaviv_perfmon_request *pmr = event-
pmrs + i;
if (pmr->flags == flags)
etnaviv_perfmon_process(gpu, pmr);
}
+}
+static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
struct etnaviv_event *event)
+{
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
+}
+static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
struct etnaviv_event *event)
+{
unsigned int i;
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
for (i = 0; i < event->nr_pmrs; i++) {
const struct etnaviv_perfmon_request *pmr = event-
pmrs + i;
*pmr->bo_vma = pmr->sequence;
}
+}
/* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { struct dma_fence *fence; unsigned int event, i;
unsigned int sync[2] = { ~0U, ~0U }; int ret; ret = etnaviv_gpu_pm_get_sync(gpu);
@@ -1341,6 +1376,39 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_pm_put; }
/*
* if there are performance monitor requests we need to have
a sync point to
* re-configure gpu and process ETNA_PM_PROCESS_PRE
requests.
*/
if (cmdbuf->nr_pmrs) {
sync[0] = event_alloc(gpu);
if (unlikely(sync[0] == ~0U)) {
DRM_ERROR("no free events for sync point
0\n");
event_free(gpu, event);
ret = -EBUSY;
goto out_pm_put;
}
}
/*
* if there are performance monitor requests we need to have
sync point to
* re-configure gpu, process ETNA_PM_PROCESS_POST requests
and update the
* sequence number for userspace.
*/
if (cmdbuf->nr_pmrs) {
sync[1] = event_alloc(gpu);
if (unlikely(sync[1] == ~0U)) {
DRM_ERROR("no free events for sync point
1\n");
event_free(gpu, event);
if (unlikely(sync[0] == ~0U))
event_free(gpu, sync[0]);
ret = -EBUSY;
goto out_pm_put;
}
}
This is dangerous. We aren't holding the GPU lock at this point (and we can't because of livelocks with the GPU hangchecker), so given enough parallel submits with PMRs all the submits might abort as they can't allocate enough events, as each one might hold one out of the available events.
I think what we need here is to extend the event_alloc API to take the number of events we need and grab them all at once under the event spinlock.
That is a good idea - will change the event_alloc API in a separate patch.
mutex_lock(&gpu->lock); fence = etnaviv_gpu_fence_alloc(gpu);
@@ -1360,8 +1428,22 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->lastctx = cmdbuf->ctx; }
if (sync[0] != ~0U) {
gpu->event[sync[0]].sync_point =
&sync_point_perfmon_sample_pre;
gpu->event[sync[0]].nr_pmrs = cmdbuf->nr_pmrs;
gpu->event[sync[0]].pmrs = cmdbuf->pmrs;
etnaviv_sync_point_queue(gpu, sync[0]);
}
etnaviv_buffer_queue(gpu, event, cmdbuf);
if (sync[1] != ~0U) {
gpu->event[sync[1]].sync_point =
&sync_point_perfmon_sample_post;
gpu->event[sync[1]].nr_pmrs = cmdbuf->nr_pmrs;
gpu->event[sync[1]].pmrs = cmdbuf->pmrs;
etnaviv_sync_point_queue(gpu, sync[1]);
}
cmdbuf->fence = fence; list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
@@ -1455,20 +1537,22 @@ static irqreturn_t irq_handler(int irq, void *data) etnaviv_process_sync_point(gpu, &gpu->event[event]);
fence = gpu->event[event].fence;
gpu->event[event].fence = NULL;
dma_fence_signal(fence);
/*
* Events can be processed out of
order. Eg,
* - allocate and queue event 0
* - allocate event 1
* - event 0 completes, we process it
* - allocate and queue event 0
* - event 1 and event 0 complete
* we can end up processing event 0 first,
then 1.
*/
if (fence_after(fence->seqno, gpu-
completed_fence))
gpu->completed_fence = fence->seqno;
if (fence) {
gpu->event[event].fence = NULL;
dma_fence_signal(fence);
/*
* Events can be processed out of
order. Eg,
* - allocate and queue event 0
* - allocate event 1
* - event 0 completes, we process
it
* - allocate and queue event 0
* - event 1 and event 0 complete
* we can end up processing event 0
first, then 1.
*/
if (fence_after(fence->seqno, gpu-
completed_fence))
gpu->completed_fence =
fence->seqno;
} event_free(gpu, event); }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index fee6ed9..71375ab 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -92,6 +92,10 @@ struct etnaviv_event { struct dma_fence *fence;
void (*sync_point)(struct etnaviv_gpu *gpu, struct
etnaviv_event *event);
/* performance monitor requests */
unsigned int nr_pmrs;
struct etnaviv_perfmon_request *pmrs;
This should be a pointer to the cmdbuf itself, so we don't copy the information to various places.
Makes sense - will be changed in v2.
};
struct etnaviv_cmdbuf_suballoc;
greets -- Christian Gmeiner, MSc
https://www.youtube.com/user/AloryOFFICIAL https://soundcloud.com/christian-gmeiner
dri-devel@lists.freedesktop.org