From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_semaphore.c | 6 +++++- drivers/gpu/drm/radeon/radeon_trace.h | 36 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 8dcc20f..97d73bf 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -29,7 +29,7 @@ */ #include <drm/drmP.h> #include "radeon.h" - +#include "radeon_trace.h"
int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -56,6 +56,8 @@ int radeon_semaphore_create(struct radeon_device *rdev, void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) { + trace_radeon_semaphore_signale(ring, semaphore); + --semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); } @@ -63,6 +65,8 @@ void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) { + trace_radeon_semaphore_wait(ring, semaphore); + ++semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); } diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index 811bca6..9f0e181 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, TP_ARGS(dev, seqno) );
+DECLARE_EVENT_CLASS(radeon_semaphore_request, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem), + + TP_STRUCT__entry( + __field(int, ring) + __field(signed, waiters) + __field(uint64_t, gpu_addr) + ), + + TP_fast_assign( + __entry->ring = ring; + __entry->waiters = sem->waiters; + __entry->gpu_addr = sem->gpu_addr; + ), + + TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring, + __entry->waiters, __entry->gpu_addr) +); + +DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem) +); + +DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem) +); + #endif
/* This part must be outside protection */
From: Christian König christian.koenig@amd.com
To workaround bugs and/or certain limits it's sometimes useful to fall back to waiting on fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 13 ++- drivers/gpu/drm/radeon/cik_sdma.c | 13 ++- drivers/gpu/drm/radeon/evergreen_dma.c | 9 +-- drivers/gpu/drm/radeon/r100.c | 3 +- drivers/gpu/drm/radeon/r600.c | 13 ++- drivers/gpu/drm/radeon/r600_dma.c | 13 ++- drivers/gpu/drm/radeon/radeon.h | 14 ++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++--- drivers/gpu/drm/radeon/radeon_cs.c | 9 ++- drivers/gpu/drm/radeon/radeon_fence.c | 30 +++++++ drivers/gpu/drm/radeon/radeon_gart.c | 4 +- drivers/gpu/drm/radeon/radeon_ring.c | 46 +++-------- drivers/gpu/drm/radeon/radeon_semaphore.c | 127 ++++++++++++++++++++++-------- drivers/gpu/drm/radeon/rv770_dma.c | 9 +-- drivers/gpu/drm/radeon/si_dma.c | 9 +-- drivers/gpu/drm/radeon/uvd_v1_0.c | 4 +- drivers/gpu/drm/radeon/uvd_v3_1.c | 4 +- 17 files changed, 191 insertions(+), 147 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ae92aa0..811fc1b 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3556,7 +3556,7 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); }
-void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -3567,6 +3567,8 @@ void cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + + return true; }
/** @@ -3609,13 +3611,8 @@ int cik_copy_cpdma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 9c9529d..0300727 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -130,7 +130,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (CIK). */ -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -141,6 +141,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); radeon_ring_write(ring, addr & 0xfffffff8); radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + + return true; }
/** @@ -443,13 +445,8 @@ int cik_copy_dma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 6a0656d..a37b544 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 784983d..10abc4d 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, RADEON_SW_INT_FIRE); }
-void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { /* Unused on older asics, since we don't have semaphores or multiple rings */ BUG(); + return false; }
int r100_copy_blit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 4e609e8..9ad0673 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2650,7 +2650,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } }
-void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -2664,6 +2664,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); + + return true; }
/** @@ -2706,13 +2708,8 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 3b31745..7844d15 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -311,7 +311,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (r6xx-SI). */ -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -322,6 +322,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + + return true; }
/** @@ -462,13 +464,8 @@ int r600_copy_dma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b9ee992..2fe2f63 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); +int radeon_fence_wait_locked(struct radeon_fence *fence); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, @@ -548,17 +549,20 @@ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; signed waiters; uint64_t gpu_addr; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; };
int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore); -void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter); + int waiting_ring); void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence); @@ -765,7 +769,6 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; };
@@ -921,7 +924,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib); int radeon_ib_pool_init(struct radeon_device *rdev); @@ -1638,7 +1640,7 @@ struct radeon_asic_ring { /* command emmit functions */ void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); - void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f2833ee..c9fd97b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p); int r600_dma_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void r600_dma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev, */ void cayman_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cayman_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev); @@ -697,7 +693,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -717,7 +713,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void cik_fence_compute_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -807,7 +803,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev);
int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -819,7 +815,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
/* uvd v3.1 */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 26ca223..f41594b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -159,7 +159,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) if (!p->relocs[i].robj) continue;
- radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); + radeon_semaphore_sync_to(p->ib.semaphore, + p->relocs[i].robj->tbo.sync_obj); } }
@@ -411,9 +412,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_ib_sync_to(&parser->ib, vm->fence); - radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( - rdev, vm, parser->ring)); + radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); + radeon_semaphore_sync_to(parser->ib.semaphore, + radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 281d14c..d3a86e4 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -472,6 +472,36 @@ int radeon_fence_wait_any(struct radeon_device *rdev, }
/** + * radeon_fence_wait_locked - wait for a fence to signal + * + * @fence: radeon fence object + * + * Wait for the requested fence to signal (all asics). + * Returns 0 if the fence has passed, error for all other cases. + */ +int radeon_fence_wait_locked(struct radeon_fence *fence) +{ + uint64_t seq[RADEON_NUM_RINGS] = {}; + int r; + + if (fence == NULL) { + WARN(1, "Querying an invalid fence : %p !\n", fence); + return -EINVAL; + } + + seq[fence->ring] = fence->seq; + if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) + return 0; + + r = radeon_fence_wait_seq(fence->rdev, seq, false, false); + if (r) + return r; + + fence->seq = RADEON_FENCE_SIGNALED_SEQ; + return 0; +} + +/** * radeon_fence_wait_next_locked - wait for the next fence to signal * * @rdev: radeon device pointer diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 8a83b89..cd7489b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -651,7 +651,7 @@ retry: radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, 0, pd_entries, 0, 0);
- radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); @@ -1220,7 +1220,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags));
- radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 18254e1..9214403 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size) { - int i, r; + int r;
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { @@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); } ib->is_const_ib = false; - for (i = 0; i < RADEON_NUM_RINGS; ++i) - ib->sync_to[i] = NULL;
return 0; } @@ -109,25 +107,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) }
/** - * radeon_ib_sync_to - sync to fence before executing the IB - * - * @ib: IB object to add fence to - * @fence: fence to sync to - * - * Sync to the fence before executing the IB - */ -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = ib->sync_to[fence->ring]; - ib->sync_to[fence->ring] = radeon_fence_later(fence, other); -} - -/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer @@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - bool need_sync = false; - int i, r = 0; + int r = 0;
if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = ib->sync_to[i]; - if (radeon_fence_need_sync(fence, ib->ring)) { - need_sync = true; - radeon_semaphore_sync_rings(rdev, ib->semaphore, - fence->ring, ib->ring); - radeon_fence_note_sync(fence, ib->ring); - } - } - /* immediately free semaphore when we don't need to sync */ - if (!need_sync) { - radeon_semaphore_free(rdev, &ib->semaphore, NULL); + + /* sync with other rings */ + r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to sync rings (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; } + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush for every IB */ if (ib->vm /*&& !ib->vm->last_flush*/) { diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 97d73bf..2b42aa1 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,7 +34,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { - int r; + int i, r;
*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { @@ -50,58 +50,121 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + (*semaphore)->sync_to[i] = NULL; + return 0; }
-void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - trace_radeon_semaphore_signale(ring, semaphore); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_signale(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) { + --semaphore->waiters;
- --semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_signal_addr = semaphore->gpu_addr; + return true; + } + return false; }
-void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - trace_radeon_semaphore_wait(ring, semaphore); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_wait(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) { + ++semaphore->waiters;
- ++semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_wait_addr = semaphore->gpu_addr; + return true; + } + return false; }
-/* caller must hold ring lock */ +/** + * radeon_semaphore_sync_to - use the semaphore to sync to a fence + * + * @semaphore: semaphore object to add fence to + * @fence: fence to sync to + * + * Sync to the fence using this semaphore object + */ +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = semaphore->sync_to[fence->ring]; + semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); +} + +/** + * radeon_semaphore_sync_rings - sync ring to all registered fences + * + * @rdev: radeon_device pointer + * @semaphore: semaphore object to use for sync + * @ring: ring that needs sync + * + * Ensure that all registered fences are signaled before letting + * the ring continue. The caller must hold the ring lock. + */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter) + int ring) { - int r; + int i, r;
- /* no need to signal and wait on the same ring */ - if (signaler == waiter) { - return 0; - } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = semaphore->sync_to[i];
- /* prevent GPU deadlocks */ - if (!rdev->ring[signaler].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - return -EINVAL; - } + /* check if we really need to sync */ + if (!radeon_fence_need_sync(fence, ring)) + continue;
- r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); - if (r) { - return r; - } - radeon_semaphore_emit_signal(rdev, signaler, semaphore); - radeon_ring_commit(rdev, &rdev->ring[signaler]); + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Syncing to a disabled ring!"); + return -EINVAL; + }
- /* we assume caller has already allocated space on waiters ring */ - radeon_semaphore_emit_wait(rdev, waiter, semaphore); + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) { + return r; + }
- /* for debugging lockup only, used by sysfs debug files */ - rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr; - rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr; + /* emit the signal semaphore */ + if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { + /* signaling wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + /* we assume caller has already allocated space on waiters ring */ + if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + /* waiting wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_fence_note_sync(fence, ring); + }
return 0; } diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index f9b02e3..aca8cbe 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 8e8f461..59be2cf 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -195,13 +195,8 @@ int si_copy_dma(struct radeon_device *rdev, return r; }
- if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 7266805..d4a68af 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -357,7 +357,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -372,6 +372,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, emit_wait ? 1 : 0); + + return true; }
/** diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c index 5b6fa1f..d722db2 100644 --- a/drivers/gpu/drm/radeon/uvd_v3_1.c +++ b/drivers/gpu/drm/radeon/uvd_v3_1.c @@ -37,7 +37,7 @@ * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); + + return true; }
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 811fc1b..c169897 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3561,6 +3561,8 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_semaphore *semaphore, bool emit_wait) { +/* TODO: figure out why semaphore cause lockups */ +#if 0 uint64_t addr = semaphore->gpu_addr; unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
@@ -3569,6 +3571,9 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
return true; +#else + return false; +#endif }
/**
Added the series to my queue for 3.13.
Thanks,
Alex
On Tue, Nov 12, 2013 at 6:58 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_semaphore.c | 6 +++++- drivers/gpu/drm/radeon/radeon_trace.h | 36 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 8dcc20f..97d73bf 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -29,7 +29,7 @@ */ #include <drm/drmP.h> #include "radeon.h"
+#include "radeon_trace.h"
int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -56,6 +56,8 @@ int radeon_semaphore_create(struct radeon_device *rdev, void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) {
trace_radeon_semaphore_signale(ring, semaphore);
--semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false);
} @@ -63,6 +65,8 @@ void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) {
trace_radeon_semaphore_wait(ring, semaphore);
++semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
} diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index 811bca6..9f0e181 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, TP_ARGS(dev, seqno) );
+DECLARE_EVENT_CLASS(radeon_semaphore_request,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem),
TP_STRUCT__entry(
__field(int, ring)
__field(signed, waiters)
__field(uint64_t, gpu_addr)
),
TP_fast_assign(
__entry->ring = ring;
__entry->waiters = sem->waiters;
__entry->gpu_addr = sem->gpu_addr;
),
TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
__entry->waiters, __entry->gpu_addr)
+);
+DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem)
+);
+DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem)
+);
#endif
/* This part must be outside protection */
1.8.1.2
Am 14.11.2013 16:37, schrieb Alex Deucher:
Added the series to my queue for 3.13.
Please drop [PATCH 3/3] drm/radeon: disable CIK CP semaphores for now.
With the recent discovery that's an alignment issue that doesn't seems to be necessary any more.
Christian.
Thanks,
Alex
On Tue, Nov 12, 2013 at 6:58 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_semaphore.c | 6 +++++- drivers/gpu/drm/radeon/radeon_trace.h | 36 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 8dcc20f..97d73bf 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -29,7 +29,7 @@ */ #include <drm/drmP.h> #include "radeon.h"
+#include "radeon_trace.h"
int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -56,6 +56,8 @@ int radeon_semaphore_create(struct radeon_device *rdev, void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) {
trace_radeon_semaphore_signale(ring, semaphore);
}--semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false);
@@ -63,6 +65,8 @@ void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore) {
trace_radeon_semaphore_wait(ring, semaphore);
}++semaphore->waiters; radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index 811bca6..9f0e181 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, TP_ARGS(dev, seqno) );
+DECLARE_EVENT_CLASS(radeon_semaphore_request,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem),
TP_STRUCT__entry(
__field(int, ring)
__field(signed, waiters)
__field(uint64_t, gpu_addr)
),
TP_fast_assign(
__entry->ring = ring;
__entry->waiters = sem->waiters;
__entry->gpu_addr = sem->gpu_addr;
),
TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
__entry->waiters, __entry->gpu_addr)
+);
+DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem)
+);
+DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait,
TP_PROTO(int ring, struct radeon_semaphore *sem),
TP_ARGS(ring, sem)
+);
#endif
/* This part must be outside protection */
-- 1.8.1.2
dri-devel@lists.freedesktop.org