During debug sessions I encountered a need to trace back a job dependecy a few steps back to the first failing job. This trace helpped me a lot.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 8 ++++++-- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 3f2fc5e..45703b9 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -506,9 +506,13 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
- while ((entity->dependency = sched->ops->dependency(sched_job, entity))) - if (drm_sched_entity_add_dependency_cb(entity)) + while ((entity->dependency = sched->ops->dependency(sched_job, entity))) { + if (drm_sched_entity_add_dependency_cb(entity)) { + + trace_drm_sched_job_wait_dep(sched_job, entity->dependency); return NULL; + } + }
/* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty)) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 4998ad9..1626f39 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -74,6 +74,30 @@ TRACE_EVENT(drm_sched_process_job, TP_printk("fence=%p signaled", __entry->fence) );
+TRACE_EVENT(drm_sched_job_wait_dep, + TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence), + TP_ARGS(sched_job, fence), + TP_STRUCT__entry( + __field(const char *,name) + __field(uint64_t, id) + __field(struct dma_fence *, fence) + __field(uint64_t, ctx) + __field(unsigned, seqno) + ), + + TP_fast_assign( + __entry->name = sched_job->sched->name; + __entry->id = sched_job->id; + __entry->fence = fence; + __entry->ctx = fence->context; + __entry->seqno = fence->seqno; + ), + TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", + __entry->name, __entry->id, + __entry->fence, __entry->ctx, + __entry->seqno) +); + #endif
/* This part must be outside protection */
It's useful to trace any dependency a job has on prevoius jobs.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5518e62..5c22cfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" +#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
@@ -168,6 +169,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; + + if (tmp) + trace_amdgpu_ib_pipe_sync(job, tmp); + dma_fence_put(tmp); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 11f262f..3cffe53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -457,6 +457,30 @@ TRACE_EVENT(amdgpu_bo_move, __entry->new_placement, __entry->bo_size) );
+TRACE_EVENT(amdgpu_ib_pipe_sync, + TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), + TP_ARGS(sched_job, fence), + TP_STRUCT__entry( + __field(const char *,name) + __field(uint64_t, id) + __field(struct dma_fence *, fence) + __field(uint64_t, ctx) + __field(unsigned, seqno) + ), + + TP_fast_assign( + __entry->name = sched_job->base.sched->name; + __entry->id = sched_job->base.id; + __entry->fence = fence; + __entry->ctx = fence->context; + __entry->seqno = fence->seqno; + ), + TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", + __entry->name, __entry->id, + __entry->fence, __entry->ctx, + __entry->seqno) +); + #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif
Am 31.07.2018 um 16:56 schrieb Andrey Grodzovsky:
During debug sessions I encountered a need to trace back a job dependecy a few steps back to the first failing job. This trace helpped me a lot.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com
Reviewed-by: Christian König christian.koenig@amd.com for the series.
drivers/gpu/drm/scheduler/gpu_scheduler.c | 8 ++++++-- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 3f2fc5e..45703b9 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -506,9 +506,13 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
- while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
if (drm_sched_entity_add_dependency_cb(entity))
while ((entity->dependency = sched->ops->dependency(sched_job, entity))) {
if (drm_sched_entity_add_dependency_cb(entity)) {
trace_drm_sched_job_wait_dep(sched_job, entity->dependency); return NULL;
}
}
/* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty))
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 4998ad9..1626f39 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -74,6 +74,30 @@ TRACE_EVENT(drm_sched_process_job, TP_printk("fence=%p signaled", __entry->fence) );
+TRACE_EVENT(drm_sched_job_wait_dep,
TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__field(const char *,name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
__field(unsigned, seqno)
),
TP_fast_assign(
__entry->name = sched_job->sched->name;
__entry->id = sched_job->id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
__entry->name, __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
+);
#endif
/* This part must be outside protection */
On Tue, Jul 31, 2018 at 10:56:53AM -0400, Andrey Grodzovsky wrote:
During debug sessions I encountered a need to trace back a job dependecy a few steps back to the first failing job. This trace helpped me a lot.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com
Series are Reviewed-by: Huang Rui ray.huang@amd.com
drivers/gpu/drm/scheduler/gpu_scheduler.c | 8 ++++++-- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 3f2fc5e..45703b9 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -506,9 +506,13 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
- while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
if (drm_sched_entity_add_dependency_cb(entity))
while ((entity->dependency = sched->ops->dependency(sched_job, entity))) {
if (drm_sched_entity_add_dependency_cb(entity)) {
trace_drm_sched_job_wait_dep(sched_job, entity->dependency); return NULL;
}
}
/* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty))
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 4998ad9..1626f39 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -74,6 +74,30 @@ TRACE_EVENT(drm_sched_process_job, TP_printk("fence=%p signaled", __entry->fence) );
+TRACE_EVENT(drm_sched_job_wait_dep,
TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__field(const char *,name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
__field(unsigned, seqno)
),
TP_fast_assign(
__entry->name = sched_job->sched->name;
__entry->id = sched_job->id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
__entry->name, __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
+);
#endif
/* This part must be outside protection */
2.7.4
amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
dri-devel@lists.freedesktop.org