From: Christian König christian.koenig@amd.com
Just to be sure that fences we sync to won't be released while accessed.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 02ac8a1..6fccaaf 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -69,11 +69,15 @@ void radeon_sync_fence(struct radeon_sync *sync, return;
other = sync->sync_to[fence->ring]; - sync->sync_to[fence->ring] = radeon_fence_later(fence, other); + sync->sync_to[fence->ring] = radeon_fence_ref( + radeon_fence_later(fence, other)); + radeon_fence_unref(&other);
if (fence->is_vm_update) { other = sync->last_vm_update; - sync->last_vm_update = radeon_fence_later(fence, other); + sync->last_vm_update = radeon_fence_ref( + radeon_fence_later(fence, other)); + radeon_fence_unref(&other); } }
@@ -217,4 +221,9 @@ void radeon_sync_free(struct radeon_device *rdev,
for (i = 0; i < RADEON_NUM_SYNCS; ++i) radeon_semaphore_free(rdev, &sync->semaphores[i], fence); + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_unref(&sync->sync_to[i]); + + radeon_fence_unref(&sync->last_vm_update); }
From: Christian König christian.koenig@amd.com
This way we can track who created the fence and then only wait on fences that userspace doesn't knows about.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 8 +++++--- drivers/gpu/drm/radeon/cik_sdma.c | 8 +++++--- drivers/gpu/drm/radeon/evergreen_dma.c | 5 +++-- drivers/gpu/drm/radeon/r100.c | 6 ++++-- drivers/gpu/drm/radeon/r200.c | 3 ++- drivers/gpu/drm/radeon/r600.c | 8 +++++--- drivers/gpu/drm/radeon/r600_dma.c | 8 +++++--- drivers/gpu/drm/radeon/radeon.h | 15 +++++++++++---- drivers/gpu/drm/radeon/radeon_cs.c | 14 +++++++++----- drivers/gpu/drm/radeon/radeon_fence.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ib.c | 5 +++-- drivers/gpu/drm/radeon/radeon_sync.c | 19 +++++++++++-------- drivers/gpu/drm/radeon/radeon_test.c | 3 ++- drivers/gpu/drm/radeon/radeon_uvd.c | 3 ++- drivers/gpu/drm/radeon/radeon_vce.c | 6 ++++-- drivers/gpu/drm/radeon/radeon_vm.c | 18 ++++++++++-------- drivers/gpu/drm/radeon/rv770_dma.c | 5 +++-- drivers/gpu/drm/radeon/si_dma.c | 5 +++-- 18 files changed, 89 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dcde37..7f15ec5 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4013,7 +4013,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -4035,7 +4035,8 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, dst_offset += cur_size_in_bytes; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); @@ -4141,7 +4142,8 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dde5c7e..2261a88 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -560,7 +560,7 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -579,7 +579,8 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, dst_offset += cur_size_in_bytes; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); @@ -691,7 +692,8 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5;
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 96535aa..094df95 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -129,7 +129,7 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -146,7 +146,8 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, dst_offset += cur_size_in_dw * 4; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 74f06d5..81388d9 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -937,7 +937,8 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); - r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { radeon_ring_unlock_undo(rdev, ring); return ERR_PTR(r); @@ -3706,7 +3707,8 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[6] = PACKET2(0); ib.ptr[7] = PACKET2(0); ib.length_dw = 8; - r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index c70e6d5..d09fb3f 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -119,7 +119,8 @@ struct radeon_fence *r200_copy_dma(struct radeon_device *rdev, } radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); - r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { radeon_ring_unlock_undo(rdev, ring); return ERR_PTR(r); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ef5d606..462cc36 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2908,7 +2908,7 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); @@ -2935,7 +2935,8 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit);
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); @@ -3302,7 +3303,8 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index d2dd29a..013f939 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -362,7 +362,8 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4;
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); @@ -460,7 +461,7 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -477,7 +478,8 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, dst_offset += cur_size_in_dw * 4; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 54529b8..3968f91 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -359,14 +359,20 @@ struct radeon_fence_driver { struct delayed_work lockup_work; };
+/* some special values for the owner field */ +#define RADEON_FENCE_OWNER_UNDEFINED (0ul) +#define RADEON_FENCE_OWNER_VM (1ul) +#define RADEON_FENCE_OWNER_MOVE (2ul) + struct radeon_fence { struct fence base;
struct radeon_device *rdev; uint64_t seq; + /* filp or special value for fence creator */ + long owner; /* RB, DMA, etc. */ unsigned ring; - bool is_vm_update;
wait_queue_t fence_wake; }; @@ -375,7 +381,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, + int ring, long owner); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); @@ -614,7 +621,7 @@ void radeon_sync_fence(struct radeon_sync *sync, int radeon_sync_resv(struct radeon_device *rdev, struct radeon_sync *sync, struct reservation_object *resv, - bool shared); + long owner); int radeon_sync_rings(struct radeon_device *rdev, struct radeon_sync *sync, int waiting_ring); @@ -1015,7 +1022,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib, bool hdp_flush); + struct radeon_ib *const_ib, bool hdp_flush, long owner); int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); int radeon_ib_ring_tests(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 9648e28..3c3b7d9 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -237,10 +237,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
list_for_each_entry(reloc, &p->validated, tv.head) { struct reservation_object *resv; + long owner = reloc->tv.shared ? (long)p->filp : + RADEON_FENCE_OWNER_UNDEFINED;
resv = reloc->robj->tbo.resv; - r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, - reloc->tv.shared); + r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, owner);
if (r) return r; @@ -467,7 +468,8 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, (parser->ring == TN_RING_TYPE_VCE2_INDEX)) radeon_vce_note_usage(rdev);
- r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true, + (long)parser->filp); if (r) { DRM_ERROR("Failed to schedule IB !\n"); } @@ -561,9 +563,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib != NULL)) { - r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); + r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, + true, (long)parser->filp); } else { - r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true, + (long)parser->filp); }
out: diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index d13d1b5..89e8c5f 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -128,7 +128,7 @@ static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring) */ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, - int ring) + int ring, long owner) { u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
@@ -138,9 +138,9 @@ int radeon_fence_emit(struct radeon_device *rdev, return -ENOMEM; } (*fence)->rdev = rdev; + (*fence)->owner = owner; (*fence)->seq = seq; (*fence)->ring = ring; - (*fence)->is_vm_update = false; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index c39ce1f..525416a 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -105,6 +105,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) * @hdp_flush: Whether or not to perform an HDP cache flush + * @owner: owner for creating the fence * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. @@ -120,7 +121,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib, bool hdp_flush) + struct radeon_ib *const_ib, bool hdp_flush, long owner) { struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; @@ -162,7 +163,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, radeon_sync_free(rdev, &const_ib->sync, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); - r = radeon_fence_emit(rdev, &ib->fence, ib->ring); + r = radeon_fence_emit(rdev, &ib->fence, ib->ring, owner); if (r) { dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 6fccaaf..ca98d4b 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -73,7 +73,7 @@ void radeon_sync_fence(struct radeon_sync *sync, radeon_fence_later(fence, other)); radeon_fence_unref(&other);
- if (fence->is_vm_update) { + if (fence->owner == RADEON_FENCE_OWNER_VM) { other = sync->last_vm_update; sync->last_vm_update = radeon_fence_ref( radeon_fence_later(fence, other)); @@ -93,7 +93,7 @@ void radeon_sync_fence(struct radeon_sync *sync, int radeon_sync_resv(struct radeon_device *rdev, struct radeon_sync *sync, struct reservation_object *resv, - bool shared) + long owner) { struct reservation_object_list *flist; struct fence *f; @@ -110,20 +110,23 @@ int radeon_sync_resv(struct radeon_device *rdev, r = fence_wait(f, true);
flist = reservation_object_get_list(resv); - if (shared || !flist || r) + if (!flist || r) return r;
for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); fence = to_radeon_fence(f); - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else + if (fence && fence->rdev == rdev) { + if (fence->owner != owner || + fence->owner == RADEON_FENCE_OWNER_UNDEFINED) + radeon_sync_fence(sync, fence); + } else { r = fence_wait(f, true);
- if (r) - break; + if (r) + break; + } } return r; } diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 07b506b..5e38b95 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -298,7 +298,8 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, DRM_ERROR("Failed to lock ring A %d\n", ring->idx); return r; } - radeon_fence_emit(rdev, fence, ring->idx); + radeon_fence_emit(rdev, fence, ring->idx, + RADEON_FENCE_OWNER_UNDEFINED); radeon_ring_unlock_commit(rdev, ring, false); } return 0; diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index c10b2ae..1ee9ac3 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -671,7 +671,8 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.ptr[i] = PACKET2(0); ib.length_dw = 16;
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED);
if (fence) *fence = radeon_fence_ref(ib.fence); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 976fe43..e64bbcb 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -369,7 +369,8 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0;
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } @@ -426,7 +427,8 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0;
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index cde48c4..d9074bb 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -414,11 +414,11 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > 64);
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_VM); if (r) goto error_free;
- ib.fence->is_vm_update = true; radeon_bo_fence(bo, ib.fence, false);
error_free: @@ -693,14 +693,15 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);
- radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true); + radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, + RADEON_FENCE_OWNER_VM); WARN_ON(ib.length_dw > ndw); - r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_VM); if (r) { radeon_ib_free(rdev, &ib); return r; } - ib.fence->is_vm_update = true; radeon_bo_fence(pd, ib.fence, false); } radeon_ib_free(rdev, &ib); @@ -819,7 +820,8 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev, uint64_t pte; int r;
- radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true); + radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, + RADEON_FENCE_OWNER_VM); r = reservation_object_reserve_shared(pt->tbo.resv); if (r) return r; @@ -1004,12 +1006,12 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > ndw);
- r = radeon_ib_schedule(rdev, &ib, NULL, false); + r = radeon_ib_schedule(rdev, &ib, NULL, false, + RADEON_FENCE_OWNER_VM); if (r) { radeon_ib_free(rdev, &ib); return r; } - ib.fence->is_vm_update = true; radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); radeon_fence_unref(&bo_va->last_pt_update); bo_va->last_pt_update = radeon_fence_ref(ib.fence); diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index acff6e0..fd274d1 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -63,7 +63,7 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -80,7 +80,8 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, dst_offset += cur_size_in_dw * 4; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index f5cc777..6420a19 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -245,7 +245,7 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, return ERR_PTR(r); }
- radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED); radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { @@ -262,7 +262,8 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, dst_offset += cur_size_in_bytes; }
- r = radeon_fence_emit(rdev, &fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx, + RADEON_FENCE_OWNER_MOVE); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_sync_free(rdev, &sync, NULL);
From: Christian König christian.koenig@amd.com
This patch adds a new 64bit ID as a result to each command submission.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon.h | 13 +- drivers/gpu/drm/radeon/radeon_cs.c | 13 ++ drivers/gpu/drm/radeon/radeon_kms.c | 41 +++---- drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++ include/uapi/drm/radeon_drm.h | 1 + 6 files changed, 277 insertions(+), 22 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 12bc212..7145f15 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \ - radeon_sync.o + radeon_sync.o radeon_seq.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3968f91..b9fde1d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -433,6 +433,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a, }
/* + * Userspace command submission identifier generation + */ +struct radeon_seq; + +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence); +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id); +void radeon_seq_destroy(struct radeon_seq **seq); + +/* * Tiling registers */ struct radeon_surface_reg { @@ -975,7 +984,9 @@ struct radeon_vm_manager { * file private structure */ struct radeon_fpriv { - struct radeon_vm vm; + struct radeon_vm vm; + struct mutex seq_lock; + struct radeon_seq *seq; };
/* diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 3c3b7d9..c0fc8d8 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -398,6 +398,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo unsigned i;
if (!error) { + if (parser->chunk_flags && + parser->chunk_flags->length_dw > 4) { + struct radeon_fpriv *fpriv = parser->filp->driver_priv; + uint32_t __user *to = parser->chunk_flags->user_ptr; + uint64_t id; + + mutex_lock(&fpriv->seq_lock); + id = radeon_seq_push(&fpriv->seq, parser->ib.fence); + mutex_unlock(&fpriv->seq_lock); + + copy_to_user(&to[3], &id, sizeof(uint64_t)); + } + /* Sort the buffer list from the smallest to largest buffer, * which affects the order of buffers in the LRU list. * This assures that the smallest buffers are added first diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f4dd26a..db5c986 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -585,39 +585,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) */ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { + struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); struct radeon_device *rdev = dev->dev_private; int r;
- file_priv->driver_priv = NULL; + if (unlikely(!fpriv)) + return -ENOMEM; + + file_priv->driver_priv = fpriv;
r = pm_runtime_get_sync(dev->dev); if (r < 0) - return r; + goto error;
/* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { - struct radeon_fpriv *fpriv; struct radeon_vm *vm; int r;
- fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); - if (unlikely(!fpriv)) { - return -ENOMEM; - } - vm = &fpriv->vm; r = radeon_vm_init(rdev, vm); - if (r) { - kfree(fpriv); - return r; - } + if (r) + goto error;
if (rdev->accel_working) { r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) { radeon_vm_fini(rdev, vm); - kfree(fpriv); - return r; + goto error; }
/* map the ib pool buffer read only into @@ -630,16 +625,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) RADEON_VM_PAGE_SNOOPED); if (r) { radeon_vm_fini(rdev, vm); - kfree(fpriv); - return r; + goto error; } } - file_priv->driver_priv = fpriv; }
+ mutex_init(&fpriv->seq_lock); + pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; + +error: + kfree(fpriv); + return r; }
/** @@ -653,11 +652,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) void radeon_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv) { + struct radeon_fpriv *fpriv = file_priv->driver_priv; struct radeon_device *rdev = dev->dev_private;
+ radeon_seq_destroy(&fpriv->seq); + /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) { - struct radeon_fpriv *fpriv = file_priv->driver_priv; struct radeon_vm *vm = &fpriv->vm; int r;
@@ -671,9 +672,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev, }
radeon_vm_fini(rdev, vm); - kfree(fpriv); - file_priv->driver_priv = NULL; } + kfree(fpriv); + file_priv->driver_priv = NULL; }
/** diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c new file mode 100644 index 0000000..d8857f1 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_seq.c @@ -0,0 +1,229 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König christian.koenig@amd.com + */ + +#include <drm/drmP.h> +#include "radeon.h" + +/* + * ID sequences + * This code generates a 64bit identifier for a command submission. + * It works by adding the fence of the command submission to a automatically + * resizing ring buffer. + */ + +struct radeon_seq { + uint64_t start; + uint64_t end; + uint64_t mask; + struct radeon_seq *replacement; +}; + +/** + * radeon_seq_create - create a new sequence object + * + * @start: start value for this sequence + * @size: size of the ring buffer, must be power of two + * + * Allocate and initialize a new ring buffer and header. + * Returns NULL if allocation fails, new object otherwise. + */ +static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size) +{ + unsigned bytes = sizeof(struct radeon_seq) + + size * sizeof(struct radeon_fence *); + + struct radeon_seq *seq; + + seq = kmalloc(bytes, GFP_KERNEL); + if (!seq) + return NULL; + + seq->start = start; + seq->end = start; + seq->mask = size - 1; + seq->replacement = NULL; + + return seq; +} + +/** + * radeon_seq_ring - get pointer to ring buffer + * + * @seq: sequence object + * + * Calculate the address of the ring buffer. + */ +static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq) +{ + return (struct radeon_fence **)&seq[1]; +} + +/** + * radeon_seq_try_free - try to free fences from the ring buffer + * + * @seq: sequence object + * + * Try to free fences from the start of the ring buffer. + */ +static void radeon_seq_try_free(struct radeon_seq *seq) +{ + struct radeon_fence **ring = radeon_seq_ring(seq); + + while (seq->start != seq->end) { + unsigned idx = seq->start & seq->mask; + struct radeon_fence *fence = ring[idx]; + + if (!radeon_fence_signaled(fence)) + break; + + radeon_fence_unref(&fence); + ++seq->start; + } +} + +/** + * radeon_seq_add - add new fence to the end of the ring buffer + * + * @seq: sequence object + * @f: the fence object + * + * Add the fence and return the generated ID. + */ +static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f) +{ + struct radeon_fence **ring = radeon_seq_ring(seq); + + ring[seq->end & seq->mask] = radeon_fence_ref(f); + return seq->end++; +} + +/** + * radeon_seq_push - check for room and add the fence + * + * @seq: sequence object + * @fence: the fence object + * + * Check for room on the ring buffer, if there isn't enough + * reallocate the sequence object and add the fence. + * Returns the generated ID. + */ +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence) +{ + unsigned size_for_new_seq = 4; + uint64_t start_for_new_seq = 1; + + if (*seq) { + /* try to release old replacements */ + while ((*seq)->replacement) { + radeon_seq_try_free(*seq); + if ((*seq)->start == (*seq)->end) { + struct radeon_seq *repl = (*seq)->replacement; + + kfree(*seq); + *seq = repl; + } else { + /* move on to the current container */ + seq = &(*seq)->replacement; + } + } + + /* check if we have enough room for one more fence */ + radeon_seq_try_free(*seq); + if (((*seq)->end - (*seq)->start) <= (*seq)->mask) + return radeon_seq_add(*seq, fence); + + /* not enough room, let's allocate a replacement */ + size_for_new_seq = ((*seq)->mask + 1) * 2; + start_for_new_seq = (*seq)->end + 1; + seq = &(*seq)->replacement; + } + + *seq = radeon_seq_create(start_for_new_seq, size_for_new_seq); + if (!*seq) { + /* not enough memory for a new sequence object, but failing + here isn't a good idea either cause the commands are already + submitted to the hardware. So just block on the fence. */ + int r = radeon_fence_wait(fence, false); + if (r) + DRM_ERROR("Error waiting for fence (%d)\n", r); + return 0; + } + return radeon_seq_add(*seq, fence); +} + +/** + * radeon_seq_query - lockup fence by it's ID + * + * @seq: sequence object + * @id: the generated ID + * + * Lockup the associated fence by it's ID. + * Returns fence object or NULL if it couldn't be found. + */ +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id) +{ + struct radeon_fence **ring; + + while (seq && id > seq->end) + seq = seq->replacement; + + if (!seq || id < seq->start) + return NULL; + + ring = radeon_seq_ring(seq); + return ring[id & seq->mask]; +} + +/** + * radeon_seq_destroy - destroy the sequence object + * + * @seq_ptr: pointer to sequence object + * + * Destroy the sequence objects and release all fence references taken. + */ +void radeon_seq_destroy(struct radeon_seq **seq_ptr) +{ + struct radeon_seq *seq = *seq_ptr; + while (seq) { + struct radeon_seq *repl = seq->replacement; + unsigned start = seq->start & seq->mask; + unsigned end = seq->end & seq->mask; + struct radeon_fence **ring; + unsigned i; + + ring = radeon_seq_ring(seq); + for (i = start; i < end; ++i) + radeon_fence_unref(&ring[i]); + + kfree(seq); + seq = repl; + } + *seq_ptr = NULL; +} diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 50d0fb4..6b2b2e7 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -959,6 +959,7 @@ struct drm_radeon_gem_va { #define RADEON_CS_RING_VCE 4 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* 0 = normal, + = higher priority, - = lower priority */ +/* The fourth and fives dword are a 64bit fence ID generated for this CS */
struct drm_radeon_cs_chunk { uint32_t chunk_id;
On 09.12.2014 01:11, Christian König wrote:
From: Christian König christian.koenig@amd.com
This patch adds a new 64bit ID as a result to each command submission.
Signed-off-by: Christian König christian.koenig@amd.com
I noticed a few spelling mistakes, see below.
Other than these minor nits, I haven't noticed any problems in this series, though I haven't looked at it in too much detail.
+/*
- ID sequences
- This code generates a 64bit identifier for a command submission.
- It works by adding the fence of the command submission to a automatically
'to an automatically'
+/**
- radeon_seq_query - lockup fence by it's ID
'look up fence by its ID'
- @seq: sequence object
- @id: the generated ID
- Lockup the associated fence by it's ID.
'Look up the associated fence by its ID.'
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */
'fourth and fifth'
From: Christian König christian.koenig@amd.com
The driver falls back to explicit synchronization as soon as buffers move between clients or are moved by TTM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 24 +++++++++++++++++++++++- include/uapi/drm/radeon_drm.h | 7 ++++--- 3 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b9fde1d..1529afb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1101,6 +1101,7 @@ struct radeon_cs_parser { struct radeon_cs_chunk *chunk_relocs; struct radeon_cs_chunk *chunk_flags; struct radeon_cs_chunk *chunk_const_ib; + struct radeon_cs_chunk *chunk_wait_for; struct radeon_ib ib; struct radeon_ib const_ib; void *track; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index c0fc8d8..a73f9da 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -165,7 +165,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) }
p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; - p->relocs[i].tv.shared = !r->write_domain; + p->relocs[i].tv.shared = !r->write_domain || + !!p->chunk_wait_for;
radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, priority); @@ -235,6 +236,23 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p) struct radeon_bo_list *reloc; int r;
+ if (p->chunk_wait_for) { + struct radeon_fpriv *fpriv = p->filp->driver_priv; + unsigned i; + + for (i = 0; i < p->chunk_wait_for->length_dw; i += 2) { + struct radeon_fence *fence; + uint64_t *id; + + id = (uint64_t *)&p->chunk_wait_for->kdata[i]; + + mutex_lock(&fpriv->seq_lock); + fence = radeon_seq_query(fpriv->seq, *id); + radeon_sync_fence(&p->ib.sync, fence); + mutex_unlock(&fpriv->seq_lock); + } + } + list_for_each_entry(reloc, &p->validated, tv.head) { struct reservation_object *resv; long owner = reloc->tv.shared ? (long)p->filp : @@ -317,6 +335,10 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) if (p->chunks[i].length_dw == 0) return -EINVAL; } + if (user_chunk.chunk_id == RADEON_CHUNK_ID_WAIT_FOR) { + p->chunk_wait_for = &p->chunks[i]; + /* zero length wait for list is actually useful */ + }
size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 6b2b2e7..a34e3db 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -942,10 +942,11 @@ struct drm_radeon_gem_va { uint64_t offset; };
-#define RADEON_CHUNK_ID_RELOCS 0x01 -#define RADEON_CHUNK_ID_IB 0x02 -#define RADEON_CHUNK_ID_FLAGS 0x03 +#define RADEON_CHUNK_ID_RELOCS 0x01 +#define RADEON_CHUNK_ID_IB 0x02 +#define RADEON_CHUNK_ID_FLAGS 0x03 #define RADEON_CHUNK_ID_CONST_IB 0x04 +#define RADEON_CHUNK_ID_WAIT_FOR 0x05
/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ #define RADEON_CS_KEEP_TILING_FLAGS 0x01
On 09.12.2014 01:11, Christian König wrote:
From: Christian König christian.koenig@amd.com
The driver falls back to explicit synchronization as soon as buffers move between clients or are moved by TTM.
I assume this should say 'falls back to implicit synchronization'.
From: Christian König christian.koenig@amd.com
PT updates can be seen as command submissions as well, and we don't necessary need to wait on all of them.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_gem.c | 12 +++++++++++- include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index fe48f22..dd45611 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -691,8 +691,18 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r) + args->id = 0; + if (!r) { + struct radeon_fence *fence; + radeon_gem_va_update_vm(rdev, bo_va); + fence = bo_va->last_pt_update; + if (fence) { + mutex_lock(&fpriv->seq_lock); + args->id = radeon_seq_push(&fpriv->seq, fence); + mutex_unlock(&fpriv->seq_lock); + } + } args->operation = RADEON_VA_RESULT_OK; if (r) { args->operation = RADEON_VA_RESULT_ERROR; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index a34e3db..2c50838 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -940,6 +940,7 @@ struct drm_radeon_gem_va { uint32_t vm_id; uint32_t flags; uint64_t offset; + uint64_t id; };
#define RADEON_CHUNK_ID_RELOCS 0x01
From: Christian König christian.koenig@amd.com
At least inside the same client we should stop waiting for a buffer to be idle, but rather wait for a specific command submission to complete.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_gem.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/uapi/drm/radeon_drm.h | 7 +++++++ 4 files changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1529afb..d8bf3a7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2230,6 +2230,8 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_wait_cs_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_op_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index dd45611..297f327 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -494,6 +494,32 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return r; }
+int radeon_gem_wait_cs_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_fpriv *fpriv = filp->driver_priv; + struct drm_radeon_gem_wait_cs *args = data; + struct radeon_fence *fence; + unsigned long timeout; + long r; + + mutex_lock(&fpriv->seq_lock); + fence = radeon_fence_ref(radeon_seq_query(fpriv->seq, args->id)); + mutex_unlock(&fpriv->seq_lock); + + timeout = nsecs_to_jiffies(args->timeout); + r = fence_wait_timeout(&fence->base, true, timeout); + radeon_fence_unref(&fence); + + if (r == 0) + return -EBUSY; + + if (r < 0) + return r; + + return 0; +} + int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index db5c986..69b74a8 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -892,5 +892,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_CS, radeon_gem_wait_cs_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 2c50838..d700c06 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -512,6 +512,7 @@ typedef struct { #define DRM_RADEON_GEM_VA 0x2b #define DRM_RADEON_GEM_OP 0x2c #define DRM_RADEON_GEM_USERPTR 0x2d +#define DRM_RADEON_GEM_WAIT_CS 0x2e
#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -556,6 +557,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) #define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) +#define DRM_IOCTL_RADEON_GEM_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_CS, struct drm_radeon_gem_wait_cs)
typedef struct drm_radeon_init { enum { @@ -880,6 +882,11 @@ struct drm_radeon_gem_wait_idle { uint32_t pad; };
+struct drm_radeon_gem_wait_cs { + uint64_t id; + uint64_t timeout; +}; + struct drm_radeon_gem_busy { uint32_t handle; uint32_t domain;
dri-devel@lists.freedesktop.org