It is completely unnecessary to create fences before they are emitted, so remove it and a bunch of checks if fences are emitted or not.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/evergreen.c | 2 +- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r100.c | 4 +- drivers/gpu/drm/radeon/r200.c | 4 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 11 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 8 ++-- drivers/gpu/drm/radeon/radeon_benchmark.c | 10 +---- drivers/gpu/drm/radeon/radeon_fence.c | 42 ++++++------------ drivers/gpu/drm/radeon/radeon_ring.c | 19 +++++---- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- drivers/gpu/drm/radeon/radeon_test.c | 66 ++++++++++++----------------- drivers/gpu/drm/radeon/radeon_ttm.c | 30 +++++-------- drivers/gpu/drm/radeon/si.c | 6 +-- 15 files changed, 86 insertions(+), 130 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 58991af..dd3cea4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1366,7 +1366,7 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b01c2dd..9d9f5ac 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1127,7 +1127,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index fb44e7e..415b7d8 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -883,7 +883,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; @@ -947,7 +947,7 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index a26144d..f088925 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; @@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index f388a1d..e5279f9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2369,7 +2369,7 @@ int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_sa_bo *vb = NULL; int r; @@ -2670,7 +2670,7 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* FIXME: implement */ radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 03b6e0d..02f4eeb 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -703,20 +703,20 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return 0; }
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r;
- r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { radeon_ring_unlock_undo(rdev, ring); return; }
radeon_ring_unlock_commit(rdev, ring); - radeon_sa_bo_free(rdev, &vb, fence); + radeon_sa_bo_free(rdev, &vb, *fence); }
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1dc3a4a..5e259b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,7 +113,6 @@ extern int radeon_lockup_timeout;
/* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL -#define RADEON_FENCE_NOTEMITED_SEQ (~0LL)
/* internal ring indices */ /* r1xx+ has gfx CP ring */ @@ -277,8 +276,7 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); @@ -627,6 +625,7 @@ struct radeon_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; + int ring; struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; @@ -1190,20 +1189,20 @@ struct radeon_asic { uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 blit_ring_index; int (*dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 dma_ring_index; /* method used for bo copy */ int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); /* ring used for bo copies */ u32 copy_ring_index; } copy; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e76a941..8cdf075 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); @@ -144,7 +144,7 @@ extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); void r200_set_safe_registers(struct radeon_device *rdev);
/* @@ -318,7 +318,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence *fence); + unsigned num_gpu_pages, struct radeon_fence **fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -364,7 +364,7 @@ void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_sa_bo **vb); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 364f5b1..bedda9c 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -45,20 +45,14 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA: - r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev)); - if (r) - return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; case RADEON_BENCHMARK_COPY_BLIT: - r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev)); - if (r) - return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; default: DRM_ERROR("Unknown copy method\n"); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 11f5f40..401d346 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -61,15 +61,21 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; }
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) +int radeon_fence_emit(struct radeon_device *rdev, + struct radeon_fence **fence, + int ring) { /* we are protected by the ring emission mutex */ - if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { - return 0; + *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); + if ((*fence) == NULL) { + return -ENOMEM; } - fence->seq = ++rdev->fence_drv[fence->ring].seq; - radeon_fence_ring_emit(rdev, fence->ring, fence); - trace_radeon_fence_emit(rdev->ddev, fence->seq); + kref_init(&((*fence)->kref)); + (*fence)->rdev = rdev; + (*fence)->seq = ++rdev->fence_drv[ring].seq; + (*fence)->ring = ring; + radeon_fence_ring_emit(rdev, ring, *fence); + trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); return 0; }
@@ -138,25 +144,9 @@ static void radeon_fence_destroy(struct kref *kref) struct radeon_fence *fence;
fence = container_of(kref, struct radeon_fence, kref); - fence->seq = RADEON_FENCE_NOTEMITED_SEQ; kfree(fence); }
-int radeon_fence_create(struct radeon_device *rdev, - struct radeon_fence **fence, - int ring) -{ - *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); - if ((*fence) == NULL) { - return -ENOMEM; - } - kref_init(&((*fence)->kref)); - (*fence)->rdev = rdev; - (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; - (*fence)->ring = ring; - return 0; -} - static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { @@ -176,10 +166,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (!fence) { return true; } - if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { - WARN(1, "Querying an unemitted fence : %p !\n", fence); - return true; - } if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { return true; } @@ -444,9 +430,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; }
- if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { - seq[i] = fences[i]->seq; - } + seq[i] = fences[i]->seq; }
r = radeon_fence_wait_any_seq(rdev, seq, intr); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 493a7be..a0b9970 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -74,13 +74,9 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; } - r = radeon_fence_create(rdev, &ib->fence, ring); - if (r) { - dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r); - radeon_sa_bo_free(rdev, &ib->sa_bo, NULL); - return r; - }
+ ib->ring = ring; + ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; @@ -99,7 +95,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0;
if (!ib->length_dw || !ring->ready) { @@ -114,8 +110,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - radeon_ring_ib_execute(rdev, ib->fence->ring, ib); - radeon_fence_emit(rdev, ib->fence); + radeon_ring_ib_execute(rdev, ib->ring, ib); + r = radeon_fence_emit(rdev, &ib->fence, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; + } radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 32059b7..81dbb5b 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,7 +349,7 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->lock); - if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { + if (fence && !radeon_fence_signaled(fence)) { (*sa_bo)->fence = radeon_fence_ref(fence); list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[fence->ring]); diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index efff929..47e1535 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -106,13 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]);
- r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; @@ -155,13 +149,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj);
- r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; @@ -245,17 +233,6 @@ void radeon_test_ring_sync(struct radeon_device *rdev, int ridxB = radeon_ring_index(rdev, ringB); int r;
- r = radeon_fence_create(rdev, &fence1, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); - goto out_cleanup; - } - r = radeon_fence_create(rdev, &fence2, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - goto out_cleanup; - } - r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -268,9 +245,19 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence1); + r = radeon_fence_emit(rdev, &fence1, ridxA); + if (r) { + DRM_ERROR("Failed to emit fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence2); + r = radeon_fence_emit(rdev, &fence2, ridxA); + if (r) { + DRM_ERROR("Failed to emit fence 2\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringA);
mdelay(1000); @@ -342,17 +329,6 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, bool sigA, sigB; int i, r;
- r = radeon_fence_create(rdev, &fenceA, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); - goto out_cleanup; - } - r = radeon_fence_create(rdev, &fenceB, ridxB); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - goto out_cleanup; - } - r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -365,7 +341,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fenceA); + r = radeon_fence_emit(rdev, &fenceA, ridxA); + if (r) { + DRM_ERROR("Failed to emit sync fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringA);
r = radeon_ring_lock(rdev, ringB, 64); @@ -374,7 +355,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxB, semaphore); - radeon_fence_emit(rdev, fenceB); + r = radeon_fence_emit(rdev, &fenceB, ridxB); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + radeon_ring_unlock_undo(rdev, ringB); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringB);
mdelay(1000); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c94a225..2d36bdd 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -222,15 +222,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, { struct radeon_device *rdev; uint64_t old_start, new_start; - struct radeon_fence *fence, *old_fence; + struct radeon_fence *fence; struct radeon_semaphore *sem = NULL; - int r; + int r, ridx;
rdev = radeon_get_rdev(bo->bdev); - r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev)); - if (unlikely(r)) { - return r; - } + ridx = radeon_copy_ring_index(rdev); old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT;
@@ -243,7 +240,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } switch (new_mem->mem_type) { @@ -255,42 +251,38 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } - if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) { + if (!rdev->ring[ridx].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); - radeon_fence_unref(&fence); return -EINVAL; }
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
/* sync other rings */ - old_fence = bo->sync_obj; - if (old_fence && old_fence->ring != fence->ring - && !radeon_fence_signaled(old_fence)) { + fence = bo->sync_obj; + if (fence && fence->ring != ridx + && !radeon_fence_signaled(fence)) { bool sync_to_ring[RADEON_NUM_RINGS] = { }; - sync_to_ring[old_fence->ring] = true; + sync_to_ring[fence->ring] = true;
r = radeon_semaphore_create(rdev, &sem); if (r) { - radeon_fence_unref(&fence); return r; }
- r = radeon_semaphore_sync_rings(rdev, sem, - sync_to_ring, fence->ring); + r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx); if (r) { radeon_semaphore_free(rdev, sem, NULL); - radeon_fence_unref(&fence); return r; } }
+ fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ - fence); + &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 549732e..5ca8ef5 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1915,7 +1915,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, */ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; u32 header;
if (ib->is_const_ib) @@ -2855,7 +2855,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) if (ib->is_const_ib) ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); else { - switch (ib->fence->ring) { + switch (ib->ring) { case RADEON_RING_TYPE_GFX_INDEX: ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); break; @@ -2864,7 +2864,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); break; default: - dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring); + dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring); ret = -EINVAL; break; }
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 23 ++++++++++- drivers/gpu/drm/radeon/radeon_fence.c | 73 +++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5e259b4..4e232c3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -257,8 +257,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; - /* seq is protected by ring emission lock */ - uint64_t seq; + /* sync_seq is protected by ring emission lock */ + uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; unsigned long last_activity; bool initialized; @@ -288,6 +288,25 @@ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); +void radeon_fence_note_sync(struct radeon_fence *fence, int ring); +static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, + struct radeon_fence *b) +{ + if (!a) { + return b; + } + + if (!b) { + return a; + } + + if (a->seq > b->seq) { + return a; + } else { + return b; + } +}
/* * Tiling registers diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 401d346..7b55625 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -72,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].seq; + (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); @@ -449,7 +449,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * wait. */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; - if (seq >= rdev->fence_drv[ring].seq) { + if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; @@ -464,7 +464,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) * activity can be scheduled so there won't be concurrent access * to seq value. */ - return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, + return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring], ring, false, false); }
@@ -492,7 +492,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring); - emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); + emitted = rdev->fence_drv[ring].sync_seq[ring] + - atomic64_read(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; @@ -500,6 +501,51 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; }
+bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *fdrv; + + if (!fence) { + return false; + } + + if (fence->ring == dst_ring) { + return false; + } + + /* we are protected by the ring mutex */ + fdrv = &fence->rdev->fence_drv[dst_ring]; + if (fence->seq <= fdrv->sync_seq[fence->ring]) { + return false; + } + + return true; +} + +void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *dst, *src; + unsigned i; + + if (!fence) { + return; + } + + if (fence->ring == dst_ring) { + return; + } + + /* we are protected by the ring mutex */ + src = &fence->rdev->fence_drv[fence->ring]; + dst = &fence->rdev->fence_drv[dst_ring]; + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (i == dst_ring) { + continue; + } + dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); + } +} + int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -521,7 +567,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; - radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); + radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); @@ -530,10 +576,13 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) { + int i; + rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; - rdev->fence_drv[ring].seq = 0; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false; @@ -579,7 +628,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; - int i; + int i, j;
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) @@ -588,8 +637,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); - seq_printf(m, "Last emitted 0x%016llx\n", - rdev->fence_drv[i].seq); + seq_printf(m, "Last emitted 0x%016llx\n", + rdev->fence_drv[i].sync_seq[i]); + + for (j = 0; j < RADEON_NUM_RINGS; ++j) { + if (i != j && rdev->fence_drv[j].initialized) + seq_printf(m, "Last sync to ring %d 0x%016llx\n", + j, rdev->fence_drv[i].sync_seq[j]); + } } return 0; }
On Thu, May 24, 2012 at 09:49:06AM +0200, Christian König wrote:
Signed-off-by: Christian König deathsimple@vodafone.de
Need a small improvement see below, otherwise
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/radeon.h | 23 ++++++++++- drivers/gpu/drm/radeon/radeon_fence.c | 73 +++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5e259b4..4e232c3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -257,8 +257,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr;
- /* seq is protected by ring emission lock */
- uint64_t seq;
- /* sync_seq is protected by ring emission lock */
- uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; unsigned long last_activity; bool initialized;
@@ -288,6 +288,25 @@ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); +void radeon_fence_note_sync(struct radeon_fence *fence, int ring); +static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
struct radeon_fence *b)
+{
- if (!a) {
return b;
- }
- if (!b) {
return a;
- }
Please add : BUG_ON(a->ring != b->ring);
So we can catch if someone badly use this function.
- if (a->seq > b->seq) {
return a;
- } else {
return b;
- }
+}
/*
- Tiling registers
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 401d346..7b55625 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -72,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev;
- (*fence)->seq = ++rdev->fence_drv[ring].seq;
- (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
@@ -449,7 +449,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * wait. */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
- if (seq >= rdev->fence_drv[ring].seq) {
- if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT;
@@ -464,7 +464,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) * activity can be scheduled so there won't be concurrent access * to seq value. */
- return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq,
- return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring], ring, false, false);
}
@@ -492,7 +492,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring);
- emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq);
- emitted = rdev->fence_drv[ring].sync_seq[ring]
/* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000;- atomic64_read(&rdev->fence_drv[ring].last_seq);
@@ -500,6 +501,51 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; }
+bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) +{
- struct radeon_fence_driver *fdrv;
- if (!fence) {
return false;
- }
- if (fence->ring == dst_ring) {
return false;
- }
- /* we are protected by the ring mutex */
- fdrv = &fence->rdev->fence_drv[dst_ring];
- if (fence->seq <= fdrv->sync_seq[fence->ring]) {
return false;
- }
- return true;
+}
+void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) +{
- struct radeon_fence_driver *dst, *src;
- unsigned i;
- if (!fence) {
return;
- }
- if (fence->ring == dst_ring) {
return;
- }
- /* we are protected by the ring mutex */
- src = &fence->rdev->fence_drv[fence->ring];
- dst = &fence->rdev->fence_drv[dst_ring];
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
if (i == dst_ring) {
continue;
}
dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
- }
+}
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -521,7 +567,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
- radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
- radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
@@ -530,10 +576,13 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) {
- int i;
- rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0;
- rdev->fence_drv[ring].seq = 0;
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false;rdev->fence_drv[ring].sync_seq[i] = 0;
@@ -579,7 +628,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private;
- int i;
int i, j;
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized)
@@ -588,8 +637,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
seq_printf(m, "Last emitted 0x%016llx\n",
rdev->fence_drv[i].seq);
seq_printf(m, "Last emitted 0x%016llx\n",
rdev->fence_drv[i].sync_seq[i]);
for (j = 0; j < RADEON_NUM_RINGS; ++j) {
if (i != j && rdev->fence_drv[j].initialized)
seq_printf(m, "Last sync to ring %d 0x%016llx\n",
j, rdev->fence_drv[i].sync_seq[j]);
} return 0;}
}
1.7.9.5
Move inter ring syncing with semaphores into the existing ring allocations, with that we need to lock the ring mutex only once.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/evergreen_blit_kms.c | 3 +- drivers/gpu/drm/radeon/r600.c | 5 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 24 +++++++-- drivers/gpu/drm/radeon/radeon.h | 6 +-- drivers/gpu/drm/radeon/radeon_asic.h | 5 +- drivers/gpu/drm/radeon/radeon_cs.c | 38 +++----------- drivers/gpu/drm/radeon/radeon_ring.c | 30 +++++++++-- drivers/gpu/drm/radeon/radeon_semaphore.c | 71 ++++++++++----------------- drivers/gpu/drm/radeon/radeon_test.c | 6 +-- drivers/gpu/drm/radeon/radeon_ttm.c | 20 -------- 10 files changed, 92 insertions(+), 116 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 1e96bd4..e512560 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e5279f9..a8d8c44 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2371,15 +2371,16 @@ int r600_copy_blit(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence) { + struct radeon_semaphore *sem = NULL; struct radeon_sa_bo *vb = NULL; int r;
- r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb); + r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); if (r) { return r; } r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); - r600_blit_done_copy(rdev, fence, vb); + r600_blit_done_copy(rdev, fence, vb, sem); return 0; }
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 02f4eeb..2b8d641 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
@@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb) + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return r; }
+ r = radeon_semaphore_create(rdev, sem); + if (r) { + radeon_sa_bo_free(rdev, vb, NULL); + return r; + } + /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring, ring_size); if (r) { radeon_sa_bo_free(rdev, vb, NULL); + radeon_semaphore_free(rdev, sem, NULL); return r; }
+ if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { + radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, + RADEON_RING_TYPE_GFX_INDEX); + radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); + } else { + radeon_semaphore_free(rdev, sem, NULL); + } + rdev->r600_blit.primitives.set_default_state(rdev); rdev->r600_blit.primitives.set_shaders(rdev); return 0; }
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb) + struct radeon_sa_bo *vb, struct radeon_semaphore *sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence
radeon_ring_unlock_commit(rdev, ring); radeon_sa_bo_free(rdev, &vb, *fence); + radeon_semaphore_free(rdev, &sem, *fence); }
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4e232c3..aebaf28 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -465,10 +465,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring); + int signaler, int waiter); void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence);
/* @@ -648,6 +647,7 @@ struct radeon_ib { struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; };
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 8cdf075..94c427a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb); + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem); void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb); + struct radeon_sa_bo *vb, struct radeon_semaphore *sem); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, unsigned num_gpu_pages, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index c7d64a7..d295821 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }
-static int radeon_cs_sync_rings(struct radeon_cs_parser *p) +static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - bool need_sync = false; - int i, r; + int i;
for (i = 0; i < p->nrelocs; i++) { - struct radeon_fence *fence; + struct radeon_fence *a, *b;
if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) continue;
- fence = p->relocs[i].robj->tbo.sync_obj; - if (fence->ring != p->ring && !radeon_fence_signaled(fence)) { - sync_to_ring[fence->ring] = true; - need_sync = true; - } - } - - if (!need_sync) { - return 0; - } - - r = radeon_semaphore_create(p->rdev, &p->ib.semaphore); - if (r) { - return r; + a = p->relocs[i].robj->tbo.sync_obj; + b = p->ib.sync_to[a->ring]; + p->ib.sync_to[a->ring] = radeon_fence_later(a, b); } - - return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore, - sync_to_ring, p->ring); }
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) @@ -365,10 +349,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser); parser->ib.vm_id = 0; r = radeon_ib_schedule(rdev, &parser->ib); if (r) { @@ -465,10 +446,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index a0b9970..87f66c5 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -67,7 +67,7 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) { - int r; + int i, r;
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { @@ -75,20 +75,26 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }
+ r = radeon_semaphore_create(rdev, &ib->semaphore); + if (r) { + return r; + } + ib->ring = ring; ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; ib->is_const_ib = false; - ib->semaphore = NULL; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + ib->sync_to[i] = NULL;
return 0; }
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, ib->semaphore, ib->fence); + radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } @@ -96,7 +102,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - int r = 0; + bool need_sync = false; + int i, r = 0;
if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -105,11 +112,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) }
/* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64); + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = ib->sync_to[i]; + if (radeon_fence_need_sync(fence, ib->ring)) { + need_sync = true; + radeon_semaphore_sync_rings(rdev, ib->semaphore, + fence->ring, ib->ring); + radeon_fence_note_sync(fence, ib->ring); + } + } + /* immediately free semaphore when we don't need to sync */ + if (!need_sync) { + radeon_semaphore_free(rdev, &ib->semaphore, NULL); + } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index e2ace5d..7cc78de 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); }
+/* caller must hold ring lock */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring) + int signaler, int waiter) { - int i = 0, r; + int r;
- mutex_lock(&rdev->ring_lock); - r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8); - if (r) { - goto error; + /* no need to signal and wait on the same ring */ + if (signaler == waiter) { + return 0; }
- for (i = 0; i < RADEON_NUM_RINGS; ++i) { - /* no need to sync to our own or unused rings */ - if (!sync_to[i] || i == dst_ring) - continue; - - /* prevent GPU deadlocks */ - if (!rdev->ring[i].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - r = -EINVAL; - goto error; - } - - r = radeon_ring_alloc(rdev, &rdev->ring[i], 8); - if (r) { - goto error; - } - - radeon_semaphore_emit_signal(rdev, i, semaphore); - radeon_semaphore_emit_wait(rdev, dst_ring, semaphore); + /* prevent GPU deadlocks */ + if (!rdev->ring[signaler].ready) { + dev_err(rdev->dev, "Trying to sync to a disabled ring!"); + return -EINVAL; + }
- radeon_ring_commit(rdev, &rdev->ring[i]); + r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); + if (r) { + return r; } + radeon_semaphore_emit_signal(rdev, signaler, semaphore); + radeon_ring_commit(rdev, &rdev->ring[signaler]);
- radeon_ring_commit(rdev, &rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); + /* we assume caller has already allocated space on waiters ring */ + radeon_semaphore_emit_wait(rdev, waiter, semaphore);
return 0; - -error: - /* unlock all locks taken so far */ - for (--i; i >= 0; --i) { - if (sync_to[i] || i == dst_ring) { - radeon_ring_undo(&rdev->ring[i]); - } - } - radeon_ring_undo(&rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); - return r; }
void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence) { - if (semaphore == NULL) { + if (semaphore == NULL || *semaphore == NULL) { return; } - if (semaphore->waiters > 0) { + if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," - " hardware lockup imminent!\n", semaphore); + " hardware lockup imminent!\n", *semaphore); } - radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence); - kfree(semaphore); + radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence); + kfree(*semaphore); + *semaphore = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 47e1535..a94f66f 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, }
out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL);
if (fence1) radeon_fence_unref(&fence1); @@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, }
out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL);
if (fenceA) radeon_fence_unref(&fenceA); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 2d36bdd..c43035c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -223,7 +223,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; - struct radeon_semaphore *sem = NULL; int r, ridx;
rdev = radeon_get_rdev(bo->bdev); @@ -262,31 +261,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
/* sync other rings */ fence = bo->sync_obj; - if (fence && fence->ring != ridx - && !radeon_fence_signaled(fence)) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - sync_to_ring[fence->ring] = true; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - return r; - } - - r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx); - if (r) { - radeon_semaphore_free(rdev, sem, NULL); - return r; - } - } - - fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); - radeon_semaphore_free(rdev, sem, fence); radeon_fence_unref(&fence); return r; }
On Thu, May 24, 2012 at 09:49:07AM +0200, Christian König wrote:
Move inter ring syncing with semaphores into the existing ring allocations, with that we need to lock the ring mutex only once.
Signed-off-by: Christian König deathsimple@vodafone.de
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/evergreen_blit_kms.c | 3 +- drivers/gpu/drm/radeon/r600.c | 5 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 24 +++++++-- drivers/gpu/drm/radeon/radeon.h | 6 +-- drivers/gpu/drm/radeon/radeon_asic.h | 5 +- drivers/gpu/drm/radeon/radeon_cs.c | 38 +++----------- drivers/gpu/drm/radeon/radeon_ring.c | 30 +++++++++-- drivers/gpu/drm/radeon/radeon_semaphore.c | 71 ++++++++++----------------- drivers/gpu/drm/radeon/radeon_test.c | 6 +-- drivers/gpu/drm/radeon/radeon_ttm.c | 20 -------- 10 files changed, 92 insertions(+), 116 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 1e96bd4..e512560 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
- rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
- rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e5279f9..a8d8c44 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2371,15 +2371,16 @@ int r600_copy_blit(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence) {
- struct radeon_semaphore *sem = NULL; struct radeon_sa_bo *vb = NULL; int r;
- r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb);
- r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); if (r) { return r; } r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
- r600_blit_done_copy(rdev, fence, vb);
- r600_blit_done_copy(rdev, fence, vb, sem); return 0;
}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 02f4eeb..2b8d641 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
- rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
- rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
@@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
struct radeon_sa_bo **vb)
struct radeon_fence **fence, struct radeon_sa_bo **vb,
struct radeon_semaphore **sem)
{ struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return r; }
r = radeon_semaphore_create(rdev, sem);
if (r) {
radeon_sa_bo_free(rdev, vb, NULL);
return r;
}
/* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring, ring_size); if (r) { radeon_sa_bo_free(rdev, vb, NULL);
radeon_semaphore_free(rdev, sem, NULL);
return r; }
if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
RADEON_RING_TYPE_GFX_INDEX);
radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
} else {
radeon_semaphore_free(rdev, sem, NULL);
}
rdev->r600_blit.primitives.set_default_state(rdev); rdev->r600_blit.primitives.set_shaders(rdev); return 0;
}
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
struct radeon_sa_bo *vb)
struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
{ struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence
radeon_ring_unlock_commit(rdev, ring); radeon_sa_bo_free(rdev, &vb, *fence);
- radeon_semaphore_free(rdev, &sem, *fence);
}
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4e232c3..aebaf28 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -465,10 +465,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore,
bool sync_to[RADEON_NUM_RINGS],
int dst_ring);
int signaler, int waiter);
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
struct radeon_semaphore **semaphore, struct radeon_fence *fence);
/* @@ -648,6 +647,7 @@ struct radeon_ib { struct radeon_fence *fence; unsigned vm_id; bool is_const_ib;
- struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore;
};
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 8cdf075..94c427a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
struct radeon_sa_bo **vb);
struct radeon_fence **fence, struct radeon_sa_bo **vb,
struct radeon_semaphore **sem);
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
struct radeon_sa_bo *vb);
struct radeon_sa_bo *vb, struct radeon_semaphore *sem);
void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, unsigned num_gpu_pages, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index c7d64a7..d295821 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }
-static int radeon_cs_sync_rings(struct radeon_cs_parser *p) +static void radeon_cs_sync_rings(struct radeon_cs_parser *p) {
- bool sync_to_ring[RADEON_NUM_RINGS] = { };
- bool need_sync = false;
- int i, r;
int i;
for (i = 0; i < p->nrelocs; i++) {
struct radeon_fence *fence;
struct radeon_fence *a, *b;
if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) continue;
fence = p->relocs[i].robj->tbo.sync_obj;
if (fence->ring != p->ring && !radeon_fence_signaled(fence)) {
sync_to_ring[fence->ring] = true;
need_sync = true;
}
- }
- if (!need_sync) {
return 0;
- }
- r = radeon_semaphore_create(p->rdev, &p->ib.semaphore);
- if (r) {
return r;
a = p->relocs[i].robj->tbo.sync_obj;
b = p->ib.sync_to[a->ring];
}p->ib.sync_to[a->ring] = radeon_fence_later(a, b);
- return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore,
sync_to_ring, p->ring);
}
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) @@ -365,10 +349,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; }
- r = radeon_cs_sync_rings(parser);
- if (r) {
DRM_ERROR("Failed to synchronize rings !\n");
- }
- radeon_cs_sync_rings(parser); parser->ib.vm_id = 0; r = radeon_ib_schedule(rdev, &parser->ib); if (r) {
@@ -465,10 +446,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; }
- r = radeon_cs_sync_rings(parser);
- if (r) {
DRM_ERROR("Failed to synchronize rings !\n");
- }
radeon_cs_sync_rings(parser);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index a0b9970..87f66c5 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -67,7 +67,7 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) {
- int r;
int i, r;
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) {
@@ -75,20 +75,26 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }
- r = radeon_semaphore_create(rdev, &ib->semaphore);
- if (r) {
return r;
- }
- ib->ring = ring; ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; ib->is_const_ib = false;
- ib->semaphore = NULL;
for (i = 0; i < RADEON_NUM_RINGS; ++i)
ib->sync_to[i] = NULL;
return 0;
}
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) {
- radeon_semaphore_free(rdev, ib->semaphore, ib->fence);
- radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence);
} @@ -96,7 +102,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring];
- int r = 0;
bool need_sync = false;
int i, r = 0;
if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */
@@ -105,11 +112,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) }
/* 64 dwords should be enough for fence too */
- r = radeon_ring_lock(rdev, ring, 64);
- r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; }
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = ib->sync_to[i];
if (radeon_fence_need_sync(fence, ib->ring)) {
need_sync = true;
radeon_semaphore_sync_rings(rdev, ib->semaphore,
fence->ring, ib->ring);
radeon_fence_note_sync(fence, ib->ring);
}
- }
- /* immediately free semaphore when we don't need to sync */
- if (!need_sync) {
radeon_semaphore_free(rdev, &ib->semaphore, NULL);
- } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index e2ace5d..7cc78de 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); }
+/* caller must hold ring lock */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore,
bool sync_to[RADEON_NUM_RINGS],
int dst_ring)
int signaler, int waiter)
{
- int i = 0, r;
- int r;
- mutex_lock(&rdev->ring_lock);
- r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8);
- if (r) {
goto error;
- /* no need to signal and wait on the same ring */
- if (signaler == waiter) {
}return 0;
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
/* no need to sync to our own or unused rings */
if (!sync_to[i] || i == dst_ring)
continue;
/* prevent GPU deadlocks */
if (!rdev->ring[i].ready) {
dev_err(rdev->dev, "Trying to sync to a disabled ring!");
r = -EINVAL;
goto error;
}
r = radeon_ring_alloc(rdev, &rdev->ring[i], 8);
if (r) {
goto error;
}
radeon_semaphore_emit_signal(rdev, i, semaphore);
radeon_semaphore_emit_wait(rdev, dst_ring, semaphore);
- /* prevent GPU deadlocks */
- if (!rdev->ring[signaler].ready) {
dev_err(rdev->dev, "Trying to sync to a disabled ring!");
return -EINVAL;
- }
radeon_ring_commit(rdev, &rdev->ring[i]);
- r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8);
- if (r) {
}return r;
- radeon_semaphore_emit_signal(rdev, signaler, semaphore);
- radeon_ring_commit(rdev, &rdev->ring[signaler]);
- radeon_ring_commit(rdev, &rdev->ring[dst_ring]);
- mutex_unlock(&rdev->ring_lock);
/* we assume caller has already allocated space on waiters ring */
radeon_semaphore_emit_wait(rdev, waiter, semaphore);
return 0;
-error:
- /* unlock all locks taken so far */
- for (--i; i >= 0; --i) {
if (sync_to[i] || i == dst_ring) {
radeon_ring_undo(&rdev->ring[i]);
}
- }
- radeon_ring_undo(&rdev->ring[dst_ring]);
- mutex_unlock(&rdev->ring_lock);
- return r;
}
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
struct radeon_semaphore **semaphore, struct radeon_fence *fence)
{
- if (semaphore == NULL) {
- if (semaphore == NULL || *semaphore == NULL) { return; }
- if (semaphore->waiters > 0) {
- if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", semaphore);
}" hardware lockup imminent!\n", *semaphore);
- radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence);
- kfree(semaphore);
- radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
- kfree(*semaphore);
- *semaphore = NULL;
} diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 47e1535..a94f66f 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, }
out_cleanup:
- if (semaphore)
radeon_semaphore_free(rdev, semaphore, NULL);
radeon_semaphore_free(rdev, &semaphore, NULL);
if (fence1) radeon_fence_unref(&fence1);
@@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, }
out_cleanup:
- if (semaphore)
radeon_semaphore_free(rdev, semaphore, NULL);
radeon_semaphore_free(rdev, &semaphore, NULL);
if (fenceA) radeon_fence_unref(&fenceA);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 2d36bdd..c43035c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -223,7 +223,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence;
struct radeon_semaphore *sem = NULL; int r, ridx;
rdev = radeon_get_rdev(bo->bdev);
@@ -262,31 +261,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
/* sync other rings */ fence = bo->sync_obj;
- if (fence && fence->ring != ridx
&& !radeon_fence_signaled(fence)) {
bool sync_to_ring[RADEON_NUM_RINGS] = { };
sync_to_ring[fence->ring] = true;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
return r;
}
r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx);
if (r) {
radeon_semaphore_free(rdev, sem, NULL);
return r;
}
- }
- fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem);
- radeon_semaphore_free(rdev, sem, fence); radeon_fence_unref(&fence); return r;
}
1.7.9.5
It is a rw_semaphore now and only write locked while changing the clock. Also the lock is renamed to better reflect what it is protecting.
v2: Keep the ttm_vm_ops on IGPs
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_pm.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 5 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aebaf28..3e83480 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1055,6 +1055,8 @@ struct radeon_power_state {
struct radeon_pm { struct mutex mutex; + /* write locked while reprogramming mclk */ + struct rw_semaphore mclk_lock; u32 active_crtcs; int active_crtc_count; int req_vblank; @@ -1551,7 +1553,6 @@ struct radeon_device { struct work_struct audio_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ - struct mutex vram_mutex; bool audio_enabled; struct r600_audio audio_status; /* audio stuff */ struct notifier_block acpi_nb; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 066c98b..7667184 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -735,7 +735,7 @@ int radeon_device_init(struct radeon_device *rdev, spin_lock_init(&rdev->ih.lock); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); - mutex_init(&rdev->vram_mutex); + init_rwsem(&rdev->pm.mclk_lock); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); r = radeon_gem_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 830f1a7..6ecb200 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -154,11 +154,11 @@ retry: INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, 0, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { if (domain == RADEON_GEM_DOMAIN_VRAM) { @@ -219,9 +219,9 @@ void radeon_bo_unref(struct radeon_bo **bo) return; rdev = (*bo)->rdev; tbo = &((*bo)->tbo); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); ttm_bo_unref(&tbo); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (tbo == NULL) *bo = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 0882554..d13b6ae 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -251,7 +251,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) return;
mutex_lock(&rdev->ddev->struct_mutex); - mutex_lock(&rdev->vram_mutex); + down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock);
/* gui idle int has issues on older chips it seems */ @@ -303,7 +303,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE;
mutex_unlock(&rdev->ring_lock); - mutex_unlock(&rdev->vram_mutex); + up_write(&rdev->pm.mclk_lock); mutex_unlock(&rdev->ddev->struct_mutex); }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c43035c..0881131 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -797,9 +797,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_vm_ops->fault(vma, vmf); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); return r; }
On Thu, May 24, 2012 at 09:49:08AM +0200, Christian König wrote:
It is a rw_semaphore now and only write locked while changing the clock. Also the lock is renamed to better reflect what it is protecting.
v2: Keep the ttm_vm_ops on IGPs
Signed-off-by: Christian König deathsimple@vodafone.de
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_pm.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 5 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aebaf28..3e83480 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1055,6 +1055,8 @@ struct radeon_power_state {
struct radeon_pm { struct mutex mutex;
- /* write locked while reprogramming mclk */
- struct rw_semaphore mclk_lock; u32 active_crtcs; int active_crtc_count; int req_vblank;
@@ -1551,7 +1553,6 @@ struct radeon_device { struct work_struct audio_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
- struct mutex vram_mutex; bool audio_enabled; struct r600_audio audio_status; /* audio stuff */ struct notifier_block acpi_nb;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 066c98b..7667184 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -735,7 +735,7 @@ int radeon_device_init(struct radeon_device *rdev, spin_lock_init(&rdev->ih.lock); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex);
- mutex_init(&rdev->vram_mutex);
- init_rwsem(&rdev->pm.mclk_lock); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); r = radeon_gem_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 830f1a7..6ecb200 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -154,11 +154,11 @@ retry: INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */
- mutex_lock(&rdev->vram_mutex);
- down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, 0, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy);
- mutex_unlock(&rdev->vram_mutex);
- up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { if (domain == RADEON_GEM_DOMAIN_VRAM) {
@@ -219,9 +219,9 @@ void radeon_bo_unref(struct radeon_bo **bo) return; rdev = (*bo)->rdev; tbo = &((*bo)->tbo);
- mutex_lock(&rdev->vram_mutex);
- down_read(&rdev->pm.mclk_lock); ttm_bo_unref(&tbo);
- mutex_unlock(&rdev->vram_mutex);
- up_read(&rdev->pm.mclk_lock); if (tbo == NULL) *bo = NULL;
} diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 0882554..d13b6ae 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -251,7 +251,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) return;
mutex_lock(&rdev->ddev->struct_mutex);
- mutex_lock(&rdev->vram_mutex);
down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock);
/* gui idle int has issues on older chips it seems */
@@ -303,7 +303,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE;
mutex_unlock(&rdev->ring_lock);
- mutex_unlock(&rdev->vram_mutex);
- up_write(&rdev->pm.mclk_lock); mutex_unlock(&rdev->ddev->struct_mutex);
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c43035c..0881131 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -797,9 +797,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev);
- mutex_lock(&rdev->vram_mutex);
- down_read(&rdev->pm.mclk_lock); r = ttm_vm_ops->fault(vma, vmf);
- mutex_unlock(&rdev->vram_mutex);
- up_read(&rdev->pm.mclk_lock); return r;
}
-- 1.7.9.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
From: Christian Koenig christian.koenig@amd.com
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3e83480..618df9a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,9 +733,7 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; - unsigned rptr_offs; unsigned wptr; - unsigned wptr_old; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask;
On Thu, May 24, 2012 at 09:49:09AM +0200, Christian König wrote:
From: Christian Koenig christian.koenig@amd.com
Signed-off-by: Christian Koenig christian.koenig@amd.com
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/radeon.h | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3e83480..618df9a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,9 +733,7 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr;
- unsigned rptr_offs; unsigned wptr;
- unsigned wptr_old; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask;
-- 1.7.9.5
From: Christian Koenig christian.koenig@amd.com
The spinlock was actually there to protect the rptr, but rptr was read outside of the locked area.
Also we don't really need a spinlock here, an atomic should to quite fine since we only need to prevent it from being reentrant.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 29 ++++++++++++++++------------- drivers/gpu/drm/radeon/r600.c | 30 +++++++++++++++--------------- drivers/gpu/drm/radeon/radeon.h | 3 +-- drivers/gpu/drm/radeon/radeon_device.c | 3 +-- drivers/gpu/drm/radeon/si.c | 30 ++++++++++++++++-------------- 5 files changed, 49 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index dd3cea4..bfcb39e 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2943,7 +2943,6 @@ int evergreen_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; - unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false;
@@ -2951,22 +2950,24 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE;
wptr = evergreen_get_ih_wptr(rdev); + +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) + return IRQ_NONE; + rptr = rdev->ih.rptr; + if (rptr == wptr) + return IRQ_NONE; + DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); - if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } -restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ evergreen_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3265,17 +3266,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = evergreen_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = evergreen_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a8d8c44..eadbb06 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2921,7 +2921,6 @@ void r600_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; }
@@ -3373,7 +3372,6 @@ int r600_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; - unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false;
@@ -3385,24 +3383,24 @@ int r600_irq_process(struct radeon_device *rdev) RREG32(IH_RB_WPTR);
wptr = r600_get_ih_wptr(rdev); - rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) + return IRQ_NONE;
- if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); + rptr = rdev->ih.rptr; + if (rptr == wptr) return IRQ_NONE; - }
-restart_ih: + DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + /* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ r600_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3556,17 +3554,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = r600_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = r600_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 618df9a..8479096 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,11 +733,10 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; - unsigned wptr; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask; - spinlock_t lock; + atomic_t lock; bool enabled; };
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7667184..3c563d1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -731,8 +731,7 @@ int radeon_device_init(struct radeon_device *rdev, radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex); - if (rdev->family >= CHIP_R600) - spin_lock_init(&rdev->ih.lock); + atomic_set(&rdev->ih.lock, 0); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); init_rwsem(&rdev->pm.mclk_lock); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5ca8ef5..f969487 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3095,7 +3095,6 @@ static void si_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; }
@@ -3512,29 +3511,30 @@ int si_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data, ring_id; u32 ring_index; - unsigned long flags; bool queue_hotplug = false;
if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE;
wptr = si_get_ih_wptr(rdev); + +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) + return IRQ_NONE; + rptr = rdev->ih.rptr; + if (rptr == wptr) + return IRQ_NONE; + DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); - if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } -restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ si_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3785,15 +3785,17 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = si_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = si_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; }
On Thu, May 24, 2012 at 09:49:10AM +0200, Christian König wrote:
From: Christian Koenig christian.koenig@amd.com
The spinlock was actually there to protect the rptr, but rptr was read outside of the locked area.
Also we don't really need a spinlock here, an atomic should to quite fine since we only need to prevent it from being reentrant.
Signed-off-by: Christian Koenig christian.koenig@amd.com
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/evergreen.c | 29 ++++++++++++++++------------- drivers/gpu/drm/radeon/r600.c | 30 +++++++++++++++--------------- drivers/gpu/drm/radeon/radeon.h | 3 +-- drivers/gpu/drm/radeon/radeon_device.c | 3 +-- drivers/gpu/drm/radeon/si.c | 30 ++++++++++++++++-------------- 5 files changed, 49 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index dd3cea4..bfcb39e 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2943,7 +2943,6 @@ int evergreen_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index;
- unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false;
@@ -2951,22 +2950,24 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE;
wptr = evergreen_get_ih_wptr(rdev);
+restart_ih:
- /* is somebody else already processing irqs? */
- if (atomic_xchg(&rdev->ih.lock, 1))
return IRQ_NONE;
- rptr = rdev->ih.rptr;
- if (rptr == wptr)
return IRQ_NONE;
- DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags);
- if (rptr == wptr) {
spin_unlock_irqrestore(&rdev->ih.lock, flags);
return IRQ_NONE;
- }
-restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ evergreen_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4;
@@ -3265,17 +3266,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; }
- /* make sure wptr hasn't changed while processing */
- wptr = evergreen_get_ih_wptr(rdev);
- if (wptr != rdev->ih.wptr)
if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr);goto restart_ih;
- spin_unlock_irqrestore(&rdev->ih.lock, flags);
- atomic_set(&rdev->ih.lock, 0);
- /* make sure wptr hasn't changed while processing */
- wptr = evergreen_get_ih_wptr(rdev);
- if (wptr != rptr)
goto restart_ih;
- return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a8d8c44..eadbb06 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2921,7 +2921,6 @@ void r600_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false;
- rdev->ih.wptr = 0; rdev->ih.rptr = 0;
}
@@ -3373,7 +3372,6 @@ int r600_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index;
- unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false;
@@ -3385,24 +3383,24 @@ int r600_irq_process(struct radeon_device *rdev) RREG32(IH_RB_WPTR);
wptr = r600_get_ih_wptr(rdev);
rptr = rdev->ih.rptr;
DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
spin_lock_irqsave(&rdev->ih.lock, flags);
+restart_ih:
- /* is somebody else already processing irqs? */
- if (atomic_xchg(&rdev->ih.lock, 1))
return IRQ_NONE;
- if (rptr == wptr) {
spin_unlock_irqrestore(&rdev->ih.lock, flags);
- rptr = rdev->ih.rptr;
- if (rptr == wptr) return IRQ_NONE;
- }
-restart_ih:
DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ r600_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4;
@@ -3556,17 +3554,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; }
- /* make sure wptr hasn't changed while processing */
- wptr = r600_get_ih_wptr(rdev);
- if (wptr != rdev->ih.wptr)
if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr);goto restart_ih;
- spin_unlock_irqrestore(&rdev->ih.lock, flags);
- atomic_set(&rdev->ih.lock, 0);
- /* make sure wptr hasn't changed while processing */
- wptr = r600_get_ih_wptr(rdev);
- if (wptr != rptr)
goto restart_ih;
- return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 618df9a..8479096 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,11 +733,10 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr;
- unsigned wptr; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask;
- spinlock_t lock;
- atomic_t lock; bool enabled;
};
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7667184..3c563d1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -731,8 +731,7 @@ int radeon_device_init(struct radeon_device *rdev, radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex);
- if (rdev->family >= CHIP_R600)
spin_lock_init(&rdev->ih.lock);
- atomic_set(&rdev->ih.lock, 0); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); init_rwsem(&rdev->pm.mclk_lock);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5ca8ef5..f969487 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3095,7 +3095,6 @@ static void si_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false;
- rdev->ih.wptr = 0; rdev->ih.rptr = 0;
}
@@ -3512,29 +3511,30 @@ int si_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data, ring_id; u32 ring_index;
unsigned long flags; bool queue_hotplug = false;
if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE;
wptr = si_get_ih_wptr(rdev);
+restart_ih:
- /* is somebody else already processing irqs? */
- if (atomic_xchg(&rdev->ih.lock, 1))
return IRQ_NONE;
- rptr = rdev->ih.rptr;
- if (rptr == wptr)
return IRQ_NONE;
- DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags);
- if (rptr == wptr) {
spin_unlock_irqrestore(&rdev->ih.lock, flags);
return IRQ_NONE;
- }
-restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb();
/* display interrupts */ si_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4;
@@ -3785,15 +3785,17 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; }
- /* make sure wptr hasn't changed while processing */
- wptr = si_get_ih_wptr(rdev);
- if (wptr != rdev->ih.wptr)
if (queue_hotplug) schedule_work(&rdev->hotplug_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr);goto restart_ih;
- spin_unlock_irqrestore(&rdev->ih.lock, flags);
- atomic_set(&rdev->ih.lock, 0);
- /* make sure wptr hasn't changed while processing */
- wptr = si_get_ih_wptr(rdev);
- if (wptr != rptr)
goto restart_ih;
- return IRQ_HANDLED;
}
-- 1.7.9.5
From: Christian Koenig christian.koenig@amd.com
1. It is really dangerous to have more than one spinlock protecting the same information.
2. radeon_irq_set sometimes wasn't called with lock protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
3. The pm.gui_idle variable was assuming that the 3D engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */ - rdev->irq.afmt[dig->afmt->id] = true; - radeon_irq_set(rdev); + radeon_irq_kms_enable_afmt(rdev, dig->afmt->id); }
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */ - rdev->irq.afmt[dig->afmt->id] = false; - radeon_irq_set(rdev); + radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq { - bool installed; - bool sw_int[RADEON_NUM_RINGS]; - bool crtc_vblank_int[RADEON_MAX_CRTCS]; - bool pflip[RADEON_MAX_CRTCS]; - wait_queue_head_t vblank_queue; - bool hpd[RADEON_MAX_HPD_PINS]; - bool gui_idle; - bool gui_idle_acked; - wait_queue_head_t idle_queue; - bool afmt[RADEON_MAX_AFMT_BLOCKS]; - spinlock_t sw_lock; - int sw_refcount[RADEON_NUM_RINGS]; - union radeon_irq_stat_regs stat_regs; - spinlock_t pflip_lock[RADEON_MAX_CRTCS]; - int pflip_refcount[RADEON_MAX_CRTCS]; + bool installed; + spinlock_t lock; + bool sw_int[RADEON_NUM_RINGS]; + int sw_refcount[RADEON_NUM_RINGS]; + bool crtc_vblank_int[RADEON_MAX_CRTCS]; + bool pflip[RADEON_MAX_CRTCS]; + int pflip_refcount[RADEON_MAX_CRTCS]; + wait_queue_head_t vblank_queue; + bool hpd[RADEON_MAX_HPD_PINS]; + bool gui_idle; + bool gui_idle_acked; + wait_queue_head_t idle_queue; + bool afmt[RADEON_MAX_AFMT_BLOCKS]; + union radeon_irq_stat_regs stat_regs; };
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync; - bool gui_idle; fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200 + irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); /* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
dev->max_vblank_count = 0x001fffff; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
if (rdev == NULL) { return; } + spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) { - int i; int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock); - for (i = 0; i < rdev->num_crtc; i++) - spin_lock_init(&rdev->irq.pflip_lock[i]); + spin_lock_init(&rdev->irq.lock); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + +} + +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{ + unsigned long irqflags; + int r; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + + r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev), + msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..abbb04d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,29 +382,35 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; + int r;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; }
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = true; - - return radeon_irq_set(rdev); + r = radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; }
void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; }
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = false; - radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d13b6ae..79642cd 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200
static const char *radeon_pm_state_type_name[5] = { "Default", @@ -257,15 +256,8 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) { - /* wait for GPU idle */ - rdev->pm.gui_idle = false; - rdev->irq.gui_idle = true; - radeon_irq_set(rdev); - wait_event_interruptible_timeout( - rdev->irq.idle_queue, rdev->pm.gui_idle, - msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); - rdev->irq.gui_idle = false; - radeon_irq_set(rdev); + /* wait for GPU to become idle */ + radeon_irq_kms_wait_gui_idle(rdev); } } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..4e9c41a 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f969487..23be691 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3773,7 +3773,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default:
On Thu, May 24, 2012 at 3:49 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenig christian.koenig@amd.com
- It is really dangerous to have more than one
spinlock protecting the same information.
- radeon_irq_set sometimes wasn't called with lock
protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
- The pm.gui_idle variable was assuming that the 3D
engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenig christian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
- rdev->irq.afmt[dig->afmt->id] = true;
- radeon_irq_set(rdev);
- radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
}
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
- rdev->irq.afmt[dig->afmt->id] = false;
- radeon_irq_set(rdev);
- radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
- bool installed;
- bool sw_int[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- spinlock_t sw_lock;
- int sw_refcount[RADEON_NUM_RINGS];
- union radeon_irq_stat_regs stat_regs;
- spinlock_t pflip_lock[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- bool installed;
- spinlock_t lock;
- bool sw_int[RADEON_NUM_RINGS];
- int sw_refcount[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
- bool gui_idle;
fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
/* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
if (rdev == NULL) { return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
- int i;
int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock);
- for (i = 0; i < rdev->num_crtc; i++)
- spin_lock_init(&rdev->irq.pflip_lock[i]);
- spin_lock_init(&rdev->irq.lock);
r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
Should probably also add radeon_irq_kms_[en|dis]able_hpd() function and call replaced the calls to *_irq_set() in the *_hpd_init() and *_hpd_fini() callbacks for display hotplug.
Alex
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{
- unsigned long irqflags;
- int r;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev),
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
+} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..abbb04d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,29 +382,35 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
- int r;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = true;
- return radeon_irq_set(rdev);
- r = radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
}
void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = false;
radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d13b6ae..79642cd 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200
static const char *radeon_pm_state_type_name[5] = { "Default", @@ -257,15 +256,8 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) {
- /* wait for GPU idle */
- rdev->pm.gui_idle = false;
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- wait_event_interruptible_timeout(
- rdev->irq.idle_queue, rdev->pm.gui_idle,
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- /* wait for GPU to become idle */
- radeon_irq_kms_wait_gui_idle(rdev);
} } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..4e9c41a 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f969487..23be691 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3773,7 +3773,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: -- 1.7.9.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, May 24, 2012 at 11:35:15AM -0400, Alex Deucher wrote:
On Thu, May 24, 2012 at 3:49 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenig christian.koenig@amd.com
- It is really dangerous to have more than one
spinlock protecting the same information.
- radeon_irq_set sometimes wasn't called with lock
protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
- The pm.gui_idle variable was assuming that the 3D
engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenig christian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
- rdev->irq.afmt[dig->afmt->id] = true;
- radeon_irq_set(rdev);
- radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
}
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
- rdev->irq.afmt[dig->afmt->id] = false;
- radeon_irq_set(rdev);
- radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
- bool installed;
- bool sw_int[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- spinlock_t sw_lock;
- int sw_refcount[RADEON_NUM_RINGS];
- union radeon_irq_stat_regs stat_regs;
- spinlock_t pflip_lock[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- bool installed;
- spinlock_t lock;
- bool sw_int[RADEON_NUM_RINGS];
- int sw_refcount[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
- bool gui_idle;
fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
/* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
if (rdev == NULL) { return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
- int i;
int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock);
- for (i = 0; i < rdev->num_crtc; i++)
- spin_lock_init(&rdev->irq.pflip_lock[i]);
- spin_lock_init(&rdev->irq.lock);
r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
Should probably also add radeon_irq_kms_[en|dis]able_hpd() function and call replaced the calls to *_irq_set() in the *_hpd_init() and *_hpd_fini() callbacks for display hotplug.
Alex
Agree, otherwise: Reviewed-by: Jerome Glisse jglisse@redhat.com
Cheers, Jerome
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{
- unsigned long irqflags;
- int r;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev),
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
+} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..abbb04d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,29 +382,35 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
- int r;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = true;
- return radeon_irq_set(rdev);
- r = radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
}
void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = false;
radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d13b6ae..79642cd 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200
static const char *radeon_pm_state_type_name[5] = { "Default", @@ -257,15 +256,8 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) {
- /* wait for GPU idle */
- rdev->pm.gui_idle = false;
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- wait_event_interruptible_timeout(
- rdev->irq.idle_queue, rdev->pm.gui_idle,
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- /* wait for GPU to become idle */
- radeon_irq_kms_wait_gui_idle(rdev);
} } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..4e9c41a 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f969487..23be691 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3773,7 +3773,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: -- 1.7.9.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, May 24, 2012 at 11:35 AM, Alex Deucher alexdeucher@gmail.com wrote:
On Thu, May 24, 2012 at 3:49 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenig christian.koenig@amd.com
- It is really dangerous to have more than one
spinlock protecting the same information.
- radeon_irq_set sometimes wasn't called with lock
protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
- The pm.gui_idle variable was assuming that the 3D
engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenig christian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
- rdev->irq.afmt[dig->afmt->id] = true;
- radeon_irq_set(rdev);
- radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
}
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
- rdev->irq.afmt[dig->afmt->id] = false;
- radeon_irq_set(rdev);
- radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
- bool installed;
- bool sw_int[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- spinlock_t sw_lock;
- int sw_refcount[RADEON_NUM_RINGS];
- union radeon_irq_stat_regs stat_regs;
- spinlock_t pflip_lock[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- bool installed;
- spinlock_t lock;
- bool sw_int[RADEON_NUM_RINGS];
- int sw_refcount[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
- bool gui_idle;
fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
/* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
if (rdev == NULL) { return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
- int i;
int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock);
- for (i = 0; i < rdev->num_crtc; i++)
- spin_lock_init(&rdev->irq.pflip_lock[i]);
- spin_lock_init(&rdev->irq.lock);
r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
Should probably also add radeon_irq_kms_[en|dis]able_hpd() function and call replaced the calls to *_irq_set() in the *_hpd_init() and *_hpd_fini() callbacks for display hotplug.
See attached follow on patch.
Alex
On Thu, May 31, 2012 at 2:15 PM, Alex Deucher alexdeucher@gmail.com wrote:
On Thu, May 24, 2012 at 11:35 AM, Alex Deucher alexdeucher@gmail.com wrote:
On Thu, May 24, 2012 at 3:49 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenig christian.koenig@amd.com
- It is really dangerous to have more than one
spinlock protecting the same information.
- radeon_irq_set sometimes wasn't called with lock
protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
- The pm.gui_idle variable was assuming that the 3D
engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenig christian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
- rdev->irq.afmt[dig->afmt->id] = true;
- radeon_irq_set(rdev);
- radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
}
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
- rdev->irq.afmt[dig->afmt->id] = false;
- radeon_irq_set(rdev);
- radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
- bool installed;
- bool sw_int[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- spinlock_t sw_lock;
- int sw_refcount[RADEON_NUM_RINGS];
- union radeon_irq_stat_regs stat_regs;
- spinlock_t pflip_lock[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- bool installed;
- spinlock_t lock;
- bool sw_int[RADEON_NUM_RINGS];
- int sw_refcount[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
- bool gui_idle;
fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
/* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
if (rdev == NULL) { return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
- int i;
int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock);
- for (i = 0; i < rdev->num_crtc; i++)
- spin_lock_init(&rdev->irq.pflip_lock[i]);
- spin_lock_init(&rdev->irq.lock);
r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
Should probably also add radeon_irq_kms_[en|dis]able_hpd() function and call replaced the calls to *_irq_set() in the *_hpd_init() and *_hpd_fini() callbacks for display hotplug.
See attached follow on patch.
Alex
Looks good
Reviewed-by: Jerome Glisse jglisse@redhat.com
On 31.05.2012 20:15, Alex Deucher wrote:
On Thu, May 24, 2012 at 11:35 AM, Alex Deucheralexdeucher@gmail.com wrote:
On Thu, May 24, 2012 at 3:49 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenigchristian.koenig@amd.com
It is really dangerous to have more than one spinlock protecting the same information.
radeon_irq_set sometimes wasn't called with lock protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
The pm.gui_idle variable was assuming that the 3D engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
Signed-off-by: Christian Koenigchristian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/r100.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_hdmi.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 33 +++++++------- drivers/gpu/drm/radeon/radeon_irq_kms.c | 72 +++++++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++-- drivers/gpu/drm/radeon/radeon_pm.c | 12 +----- drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index bfcb39e..9e9b3bb 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3254,7 +3254,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default:
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100..c index 415b7d8..2587426 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -782,7 +782,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status& RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600..c index eadbb06..90c6639 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3542,7 +3542,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default:
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
rdev->irq.afmt[dig->afmt->id] = true;
radeon_irq_set(rdev);
radeon_irq_kms_enable_afmt(rdev, dig->afmt->id); } dig->afmt->enabled = true;
@@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
rdev->irq.afmt[dig->afmt->id] = false;
radeon_irq_set(rdev);
radeon_irq_kms_disable_afmt(rdev, dig->afmt->id); /* Older chipsets not handled by AtomBIOS */ if (rdev->family>= CHIP_R600&& !ASIC_IS_DCE3(rdev)) {
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8479096..23552b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
bool installed;
bool sw_int[RADEON_NUM_RINGS];
bool crtc_vblank_int[RADEON_MAX_CRTCS];
bool pflip[RADEON_MAX_CRTCS];
wait_queue_head_t vblank_queue;
bool hpd[RADEON_MAX_HPD_PINS];
bool gui_idle;
bool gui_idle_acked;
wait_queue_head_t idle_queue;
bool afmt[RADEON_MAX_AFMT_BLOCKS];
spinlock_t sw_lock;
int sw_refcount[RADEON_NUM_RINGS];
union radeon_irq_stat_regs stat_regs;
spinlock_t pflip_lock[RADEON_MAX_CRTCS];
int pflip_refcount[RADEON_MAX_CRTCS];
bool installed;
spinlock_t lock;
bool sw_int[RADEON_NUM_RINGS];
int sw_refcount[RADEON_NUM_RINGS];
bool crtc_vblank_int[RADEON_MAX_CRTCS];
bool pflip[RADEON_MAX_CRTCS];
int pflip_refcount[RADEON_MAX_CRTCS];
wait_queue_head_t vblank_queue;
bool hpd[RADEON_MAX_HPD_PINS];
bool gui_idle;
bool gui_idle_acked;
wait_queue_head_t idle_queue;
bool afmt[RADEON_MAX_AFMT_BLOCKS];
union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev);
@@ -633,6 +632,9 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/*
- CP& rings.
@@ -1058,7 +1060,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
bool gui_idle; fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk;
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..11fc4f7 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
- irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg;
@@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
unsigned long irqflags; unsigned i;
spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i< RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false;
@@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
}spin_unlock_irqrestore(&rdev->irq.lock, irqflags); /* Clear bits */ radeon_irq_process(rdev);
@@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
unsigned long irqflags; unsigned i; dev->max_vblank_count = 0x001fffff;
spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i< RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0;
}
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
unsigned long irqflags; unsigned i; if (rdev == NULL) { return; }
spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i< RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false;
@@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev)
@@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
int i; int r = 0; INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
spin_lock_init(&rdev->irq.sw_lock);
for (i = 0; i< rdev->num_crtc; i++)
spin_lock_init(&rdev->irq.pflip_lock[i]);
spin_lock_init(&rdev->irq.lock); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r;
@@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled&& (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled&& rdev->irq.sw_refcount[ring]<= 0); if (rdev->ddev->irq_enabled&& (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
@@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc< 0 || crtc>= rdev->num_crtc) return;
spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled&& (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc)
@@ -260,12 +268,52 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc< 0 || crtc>= rdev->num_crtc) return;
spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled&& rdev->irq.pflip_refcount[crtc]<= 0); if (rdev->ddev->irq_enabled&& (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
unsigned long irqflags;
spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.afmt[block] = true;
radeon_irq_set(rdev);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
unsigned long irqflags;
spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.afmt[block] = false;
radeon_irq_set(rdev);
}spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
Should probably also add radeon_irq_kms_[en|dis]able_hpd() function and call replaced the calls to *_irq_set() in the *_hpd_init() and *_hpd_fini() callbacks for display hotplug.
See attached follow on patch.
The version I pushed into my repo includes nearly the same implementation in the v2 version of the patch. I just haven't had time to send it to the list yet.
There also is a V2 of the IH patch. After removing the spinlock (and with it the disabling of IRQs) in the interrupt handler we seem to hit a race condition in the vblank code. Actually I think we can hit the same problem when the IH is currently running on one CPU and X modifying vblank properties on another CPU, but that is really really really unlikely.
Whatever it is I modified the IH patch to keep the spinlock for now, put I think I should look into it more closely.
Anyway going to send out the patches in a minute, Christian.
From: Christian Koenig christian.koenig@amd.com
So we can skip the looking. Also renames sw_int to ring_int, cause that better matches its purpose.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 32 ++++++++--------- drivers/gpu/drm/radeon/r100.c | 10 +++--- drivers/gpu/drm/radeon/r600.c | 10 +++--- drivers/gpu/drm/radeon/radeon.h | 6 ++-- drivers/gpu/drm/radeon/radeon_irq_kms.c | 59 +++++++++++++++---------------- drivers/gpu/drm/radeon/rs600.c | 10 +++--- drivers/gpu/drm/radeon/si.c | 30 ++++++++-------- 7 files changed, 76 insertions(+), 81 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 9e9b3bb..ec6a8a2 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2615,20 +2615,20 @@ int evergreen_irq_set(struct radeon_device *rdev)
if (rdev->family >= CHIP_CAYMAN) { /* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } } else { - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; @@ -2636,32 +2636,32 @@ int evergreen_irq_set(struct radeon_device *rdev) }
if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("evergreen_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("evergreen_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("evergreen_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("evergreen_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("evergreen_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -2984,7 +2984,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3010,7 +3010,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3036,7 +3036,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3062,7 +3062,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3088,7 +3088,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3114,7 +3114,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 2587426..2837192 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -705,18 +705,18 @@ int r100_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= RADEON_SW_INT_ENABLE; } if (rdev->irq.gui_idle) { tmp |= RADEON_GUI_IDLE_MASK; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { tmp |= RADEON_CRTC_VBLANK_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { tmp |= RADEON_CRTC2_VBLANK_MASK; } if (rdev->irq.hpd[0]) { @@ -791,7 +791,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (status & RADEON_CRTC2_VBLANK_STAT) { @@ -800,7 +800,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (status & RADEON_FP_DETECT_STAT) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 90c6639..92a4ba9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3105,18 +3105,18 @@ int r600_irq_set(struct radeon_device *rdev) hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; }
- if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("r600_irq_set: vblank 0\n"); mode_int |= D1MODE_VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("r600_irq_set: vblank 1\n"); mode_int |= D2MODE_VBLANK_INT_MASK; } @@ -3417,7 +3417,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3443,7 +3443,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 23552b4..60de11e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -612,11 +612,9 @@ union radeon_irq_stat_regs { struct radeon_irq { bool installed; spinlock_t lock; - bool sw_int[RADEON_NUM_RINGS]; - int sw_refcount[RADEON_NUM_RINGS]; + atomic_t ring_int[RADEON_NUM_RINGS]; bool crtc_vblank_int[RADEON_MAX_CRTCS]; - bool pflip[RADEON_MAX_CRTCS]; - int pflip_refcount[RADEON_MAX_CRTCS]; + atomic_t pflip[RADEON_MAX_CRTCS]; wait_queue_head_t vblank_queue; bool hpd[RADEON_MAX_HPD_PINS]; bool gui_idle; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 11fc4f7..039a09c 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -70,13 +70,13 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); @@ -87,16 +87,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { - struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; - unsigned i; - dev->max_vblank_count = 0x001fffff; - spin_lock_irqsave(&rdev->irq.lock, irqflags); - for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = true; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0; }
@@ -112,13 +103,13 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); @@ -225,25 +216,28 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { - rdev->irq.sw_int[ring] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { - rdev->irq.sw_int[ring] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -253,12 +247,14 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { - rdev->irq.pflip[crtc] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -268,13 +264,14 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { - rdev->irq.pflip[crtc] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 4e9c41a..d1df5cf 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -564,18 +564,18 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= S_000040_SW_INT_EN(1); } if (rdev->irq.gui_idle) { tmp |= S_000040_GUI_IDLE(1); } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1); } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1); } if (rdev->irq.hpd[0]) { @@ -695,7 +695,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { @@ -704,7 +704,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 23be691..82c7241 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3245,45 +3245,45 @@ int si_irq_set(struct radeon_device *rdev) hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
/* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("si_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("si_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("si_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("si_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("si_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("si_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("si_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -3552,7 +3552,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3578,7 +3578,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3604,7 +3604,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3630,7 +3630,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3656,7 +3656,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3682,7 +3682,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n");
- atomic_t ring_int[RADEON_NUM_RINGS]; bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- atomic_t pflip[RADEON_MAX_CRTCS];
Hi,
Does the linux API mandates atomic_t to be a 32bits word?
Regards,
-----Original Message----- From: Sylvain BERTRAND [mailto:sylware@legeek.net] Sent: Thursday, May 24, 2012 1:59 PM To: Christian König Cc: j.glisse@gmail.com; Koenig, Christian; dri-devel@lists.freedesktop.org Subject: Re: [PATCH 08/10] drm/radeon: replace pflip and sw_int counters with atomics
- atomic_t ring_int[RADEON_NUM_RINGS]; bool
crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int
pflip_refcount[RADEON_MAX_CRTCS];
- atomic_t pflip[RADEON_MAX_CRTCS];
Hi,
Does the linux API mandates atomic_t to be a 32bits word?
AFAIK it is, at least for the platforms we care about. But since this depends on the vertical refresh frequency even a 8bit counter should do fine.
On the other hand it was an accident that those patches hit the maillinglist in the first place, cause only the first four where supposed to be send out (my fault, sorry). This one isn't tested beside compiling, and I don't think it will work out of the box.
Cheers, Christian.
Does the linux API mandates atomic_t to be a 32bits word?
AFAIK it is, at least for the platforms we care about. ...
Then, the proper course of action would be to add to the linux API, sized atomic operation first, wouldn't it?
On Thu, May 24, 2012 at 1:46 PM, Sylvain BERTRAND sylware@legeek.net wrote:
Does the linux API mandates atomic_t to be a 32bits word?
AFAIK it is, at least for the platforms we care about. ...
Then, the proper course of action would be to add to the linux API, sized atomic operation first, wouldn't it?
No, atomic is fine for this, I think only sparc32 had 24-bit atomics, and if you can get sparc32 + a radeon, then you can keep both halves.
Dave.
On Thu, May 24, 2012 at 1:53 PM, Dave Airlie airlied@gmail.com wrote:
On Thu, May 24, 2012 at 1:46 PM, Sylvain BERTRAND sylware@legeek.net wrote:
Does the linux API mandates atomic_t to be a 32bits word?
AFAIK it is, at least for the platforms we care about. ...
Then, the proper course of action would be to add to the linux API, sized atomic operation first, wouldn't it?
No, atomic is fine for this, I think only sparc32 had 24-bit atomics, and if you can get sparc32 + a radeon, then you can keep both halves.
And even that is a lie now :-)
http://lwn.net/Articles/71732/
Dave.
Does the linux API mandates atomic_t to be a 32bits word?
AFAIK it is, at least for the platforms we care about. ...
Then, the proper course of action would be to add to the linux API, sized atomic operation first, wouldn't it?
No, atomic is fine for this, I think only sparc32 had 24-bit atomics, and if you can get sparc32 + a radeon, then you can keep both halves.
And even that is a lie now :-)
Ok then: atomic_t means exactly 32 bits!
Try to remove or replace the cs_mutex with a vm_mutex where it is still needed.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 44 +------------------------------- drivers/gpu/drm/radeon/radeon_cs.c | 7 ++--- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 16 ++++++------ drivers/gpu/drm/radeon/radeon_gem.c | 2 -- 5 files changed, 12 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 60de11e..58a2fcf 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -159,48 +159,6 @@ static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len) #endif bool radeon_get_bios(struct radeon_device *rdev);
- -/* - * Mutex which allows recursive locking from the same process. - */ -struct radeon_mutex { - struct mutex mutex; - struct task_struct *owner; - int level; -}; - -static inline void radeon_mutex_init(struct radeon_mutex *mutex) -{ - mutex_init(&mutex->mutex); - mutex->owner = NULL; - mutex->level = 0; -} - -static inline void radeon_mutex_lock(struct radeon_mutex *mutex) -{ - if (mutex_trylock(&mutex->mutex)) { - /* The mutex was unlocked before, so it's ours now */ - mutex->owner = current; - } else if (mutex->owner != current) { - /* Another process locked the mutex, take it */ - mutex_lock(&mutex->mutex); - mutex->owner = current; - } - /* Otherwise the mutex was already locked by this process */ - - mutex->level++; -} - -static inline void radeon_mutex_unlock(struct radeon_mutex *mutex) -{ - if (--mutex->level > 0) - return; - - mutex->owner = NULL; - mutex_unlock(&mutex->mutex); -} - - /* * Dummy page */ @@ -1527,7 +1485,6 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; - struct radeon_mutex cs_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool shutdown; @@ -1561,6 +1518,7 @@ struct radeon_device { struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; unsigned debugfs_count; /* virtual memory */ + struct mutex vm_mutex; struct radeon_vm_manager vm_manager; };
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d295821..d1ead9c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -438,6 +438,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, }
mutex_lock(&vm->mutex); + mutex_lock(&rdev->vm_mutex); r = radeon_vm_bind(rdev, vm); if (r) { goto out; @@ -474,6 +475,7 @@ out: } vm->fence = radeon_fence_ref(parser->ib.fence); } + mutex_unlock(&rdev->vm_mutex); mutex_unlock(&fpriv->vm.mutex); return r; } @@ -494,9 +496,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_parser parser; int r;
- radeon_mutex_lock(&rdev->cs_mutex); if (!rdev->accel_working) { - radeon_mutex_unlock(&rdev->cs_mutex); return -EBUSY; } /* initialize parser */ @@ -510,7 +510,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_parser_relocs(&parser); @@ -519,7 +518,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_ib_chunk(rdev, &parser); @@ -533,7 +531,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 3c563d1..4168012 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -728,7 +728,6 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we * can recall function without having locking issues */ - radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex); atomic_set(&rdev->ih.lock, 0); @@ -741,6 +740,7 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; /* initialize vm here */ + mutex_init(&rdev->vm_mutex); rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4fa5402..7226d2b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -356,13 +356,13 @@ int radeon_vm_manager_suspend(struct radeon_device *rdev) { struct radeon_vm *vm, *tmp;
- radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); /* unbind all active vm */ list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } rdev->vm_manager.funcs->fini(rdev); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); }
@@ -478,9 +478,9 @@ int radeon_vm_bo_add(struct radeon_device *rdev, if (last_pfn > vm->last_pfn) { /* grow va space 32M by 32M */ unsigned align = ((32 << 20) >> 12) - 1; - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_unbind_locked(rdev, vm); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); vm->last_pfn = (last_pfn + align) & ~align; } head = &vm->va; @@ -596,9 +596,9 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, return 0;
mutex_lock(&vm->mutex); - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list); @@ -643,9 +643,9 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
mutex_lock(&vm->mutex);
- radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_unbind_locked(rdev, vm); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex);
/* remove all bo */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c0130b0..527d96e 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -159,11 +159,9 @@ void radeon_gem_object_close(struct drm_gem_object *obj, static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) { if (r == -EDEADLK) { - radeon_mutex_lock(&rdev->cs_mutex); r = radeon_gpu_reset(rdev); if (!r) r = -EAGAIN; - radeon_mutex_unlock(&rdev->cs_mutex); } return r; }
From: Christian Koenig christian.koenig@amd.com
The shader preemption on cayman doesn't work correctly with multiple rings. So to be able to still make use of the compute rings we use a semaphore to make sure that only one IB can execute at the same time.
This isn't as effective as shader preemption, but also isn't as bad as putting everything on the GFX ring.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/ni.c | 142 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_cs.c | 2 +- 3 files changed, 139 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9d9f5ac..6a3e8a8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1125,13 +1125,75 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); }
+/* The shader preemption on cayman doesn't work + * correctly with multiple rings. So to be able to + * still make use of the compute rings we use a + * semaphore to make sure that only one IB can execute + * at the same time + */ +static void cayman_cp_ring_create_workaround(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r; + + r = radeon_semaphore_create(rdev, &rdev->cayman_ring_lock); + if (r) { + dev_err(rdev->dev, "Can't allocate " + "cayman_ring_lock (%d)!\n", r); + return; + } + + r = radeon_ring_alloc(rdev, ring, 8); + if (r) { + dev_err(rdev->dev, "Can't initialize " + "cayman_ring_lock (%d)!\n", r); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); + return; + } + + radeon_semaphore_emit_signal(rdev, RADEON_RING_TYPE_GFX_INDEX, + rdev->cayman_ring_lock); + + radeon_ring_commit(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); +} + +static void cayman_cp_ring_cleanup_workaround(struct radeon_device *rdev) +{ + struct radeon_fence *fence; + int r; + + r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "Can't cleanup " + "cayman_ring_lock (%d)!\n", r); + return; + } + + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, fence); + radeon_fence_unref(&fence); +} + void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring];
+ if (ib->ring != RADEON_RING_TYPE_GFX_INDEX) { + if (rdev->cayman_ring_lock == NULL) { + cayman_cp_ring_create_workaround(rdev); + } + } else { + if (rdev->cayman_ring_lock != NULL && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP1_INDEX) && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP2_INDEX)) { + cayman_cp_ring_cleanup_workaround(rdev); + } + } + /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); radeon_ring_write(ring, 1); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_wait(rdev, ib->ring, rdev->cayman_ring_lock); radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -1140,6 +1202,8 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_signal(rdev, ib->ring, rdev->cayman_ring_lock);
/* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); @@ -1190,6 +1254,25 @@ static int cayman_cp_load_microcode(struct radeon_device *rdev) return 0; }
+static int cayman_cp_start_compute(struct radeon_device *rdev, int ridx) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + int r; + + r = radeon_ring_lock(rdev, ring, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + + /* clear the compute context state */ + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0) | 2); + radeon_ring_write(ring, 0); + + radeon_ring_unlock_commit(rdev, ring); + return 0; +} + static int cayman_cp_start(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -1251,7 +1334,17 @@ static int cayman_cp_start(struct radeon_device *rdev)
radeon_ring_unlock_commit(rdev, ring);
- /* XXX init other rings */ + r = cayman_cp_start_compute(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + if (r) + return r;
return 0; } @@ -1377,14 +1470,24 @@ int cayman_cp_resume(struct radeon_device *rdev)
/* start the rings */ cayman_cp_start(rdev); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; - rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; - /* this only test cp0 */ r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); if (r) { rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; return r; } @@ -1468,7 +1571,7 @@ int cayman_asic_reset(struct radeon_device *rdev)
static int cayman_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r;
/* enable pcie gen2 link */ @@ -1556,11 +1659,27 @@ static int cayman_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, + CP_RB1_RPTR, CP_RB1_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, + CP_RB2_RPTR, CP_RB2_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + r = cayman_cp_load_microcode(rdev); if (r) return r; @@ -1607,6 +1726,7 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_ib_pool_suspend(rdev); radeon_vm_manager_suspend(rdev); r600_blit_suspend(rdev); @@ -1626,7 +1746,7 @@ int cayman_suspend(struct radeon_device *rdev) */ int cayman_init(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r;
/* Read BIOS */ @@ -1675,9 +1795,18 @@ int cayman_init(struct radeon_device *rdev) if (r) return r;
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -1734,6 +1863,7 @@ void cayman_fini(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); radeon_wb_fini(rdev); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_vm_manager_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 58a2fcf..1516b2d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1520,6 +1520,8 @@ struct radeon_device { /* virtual memory */ struct mutex vm_mutex; struct radeon_vm_manager vm_manager; + /* workaround for defect in caymans compute rings */ + struct radeon_semaphore *cayman_ring_lock; };
int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d1ead9c..54f3ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -103,7 +103,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE: - if (p->rdev->family >= CHIP_TAHITI) { + if (p->rdev->family >= CHIP_CAYMAN) { if (p->priority > 0) p->ring = CAYMAN_RING_TYPE_CP1_INDEX; else
On Thu, May 24, 2012 at 09:49:05AM +0200, Christian König wrote:
It is completely unnecessary to create fences before they are emitted, so remove it and a bunch of checks if fences are emitted or not.
Signed-off-by: Christian König deathsimple@vodafone.de
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/evergreen.c | 2 +- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r100.c | 4 +- drivers/gpu/drm/radeon/r200.c | 4 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 11 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 8 ++-- drivers/gpu/drm/radeon/radeon_benchmark.c | 10 +---- drivers/gpu/drm/radeon/radeon_fence.c | 42 ++++++------------ drivers/gpu/drm/radeon/radeon_ring.c | 19 +++++---- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- drivers/gpu/drm/radeon/radeon_test.c | 66 ++++++++++++----------------- drivers/gpu/drm/radeon/radeon_ttm.c | 30 +++++-------- drivers/gpu/drm/radeon/si.c | 6 +-- 15 files changed, 86 insertions(+), 130 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 58991af..dd3cea4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1366,7 +1366,7 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) {
- struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b01c2dd..9d9f5ac 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1127,7 +1127,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) {
- struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index fb44e7e..415b7d8 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -883,7 +883,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence)
struct radeon_fence **fence)
{ struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; @@ -947,7 +947,7 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) {
r = radeon_fence_emit(rdev, fence);
} radeon_ring_unlock_commit(rdev, ring); return r;r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index a26144d..f088925 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence)
struct radeon_fence **fence)
{ struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; @@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) {
r = radeon_fence_emit(rdev, fence);
} radeon_ring_unlock_commit(rdev, ring); return r;r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index f388a1d..e5279f9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2369,7 +2369,7 @@ int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence)
struct radeon_fence **fence)
{ struct radeon_sa_bo *vb = NULL; int r; @@ -2670,7 +2670,7 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) {
- struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
struct radeon_ring *ring = &rdev->ring[ib->ring];
/* FIXME: implement */ radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 03b6e0d..02f4eeb 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -703,20 +703,20 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return 0; }
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r;
- r = radeon_fence_emit(rdev, fence);
r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { radeon_ring_unlock_undo(rdev, ring); return; }
radeon_ring_unlock_commit(rdev, ring);
- radeon_sa_bo_free(rdev, &vb, fence);
- radeon_sa_bo_free(rdev, &vb, *fence);
}
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1dc3a4a..5e259b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,7 +113,6 @@ extern int radeon_lockup_timeout;
/* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL -#define RADEON_FENCE_NOTEMITED_SEQ (~0LL)
/* internal ring indices */ /* r1xx+ has gfx CP ring */ @@ -277,8 +276,7 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); @@ -627,6 +625,7 @@ struct radeon_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr;
- int ring; struct radeon_fence *fence; unsigned vm_id; bool is_const_ib;
@@ -1190,20 +1189,20 @@ struct radeon_asic { uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence);
u32 blit_ring_index; int (*dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,struct radeon_fence **fence);
struct radeon_fence *fence);
u32 dma_ring_index; /* method used for bo copy */ int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,struct radeon_fence **fence);
struct radeon_fence *fence);
/* ring used for bo copies */ u32 copy_ring_index; } copy;struct radeon_fence **fence);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e76a941..8cdf075 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence);
struct radeon_fence **fence);
int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); @@ -144,7 +144,7 @@ extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages,
struct radeon_fence *fence);
struct radeon_fence **fence);
void r200_set_safe_registers(struct radeon_device *rdev);
/* @@ -318,7 +318,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence *fence);
unsigned num_gpu_pages, struct radeon_fence **fence);
void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -364,7 +364,7 @@ void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_sa_bo **vb); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 364f5b1..bedda9c 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -45,20 +45,14 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA:
r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev));
if (r)
return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE,
fence);
case RADEON_BENCHMARK_COPY_BLIT:&fence); break;
r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev));
if (r)
return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE,
fence);
default: DRM_ERROR("Unknown copy method\n");&fence); break;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 11f5f40..401d346 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -61,15 +61,21 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; }
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) +int radeon_fence_emit(struct radeon_device *rdev,
struct radeon_fence **fence,
int ring)
{ /* we are protected by the ring emission mutex */
- if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
return 0;
- *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
- if ((*fence) == NULL) {
}return -ENOMEM;
- fence->seq = ++rdev->fence_drv[fence->ring].seq;
- radeon_fence_ring_emit(rdev, fence->ring, fence);
- trace_radeon_fence_emit(rdev->ddev, fence->seq);
- kref_init(&((*fence)->kref));
- (*fence)->rdev = rdev;
- (*fence)->seq = ++rdev->fence_drv[ring].seq;
- (*fence)->ring = ring;
- radeon_fence_ring_emit(rdev, ring, *fence);
- trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); return 0;
}
@@ -138,25 +144,9 @@ static void radeon_fence_destroy(struct kref *kref) struct radeon_fence *fence;
fence = container_of(kref, struct radeon_fence, kref);
- fence->seq = RADEON_FENCE_NOTEMITED_SEQ; kfree(fence);
}
-int radeon_fence_create(struct radeon_device *rdev,
struct radeon_fence **fence,
int ring)
-{
- *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
- if ((*fence) == NULL) {
return -ENOMEM;
- }
- kref_init(&((*fence)->kref));
- (*fence)->rdev = rdev;
- (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
- (*fence)->ring = ring;
- return 0;
-}
static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { @@ -176,10 +166,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (!fence) { return true; }
- if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
WARN(1, "Querying an unemitted fence : %p !\n", fence);
return true;
- } if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { return true; }
@@ -444,9 +430,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; }
if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) {
seq[i] = fences[i]->seq;
}
seq[i] = fences[i]->seq;
}
r = radeon_fence_wait_any_seq(rdev, seq, intr);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 493a7be..a0b9970 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -74,13 +74,9 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; }
- r = radeon_fence_create(rdev, &ib->fence, ring);
- if (r) {
dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r);
radeon_sa_bo_free(rdev, &ib->sa_bo, NULL);
return r;
- }
- ib->ring = ring;
- ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0;
@@ -99,7 +95,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) {
- struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0;
if (!ib->length_dw || !ring->ready) {
@@ -114,8 +110,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; }
- radeon_ring_ib_execute(rdev, ib->fence->ring, ib);
- radeon_fence_emit(rdev, ib->fence);
- radeon_ring_ib_execute(rdev, ib->ring, ib);
- r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
- if (r) {
dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r);
radeon_ring_unlock_undo(rdev, ring);
return r;
- } radeon_ring_unlock_commit(rdev, ring); return 0;
} diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 32059b7..81dbb5b 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,7 +349,7 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->lock);
- if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
- if (fence && !radeon_fence_signaled(fence)) { (*sa_bo)->fence = radeon_fence_ref(fence); list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[fence->ring]);
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index efff929..47e1535 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -106,13 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]);
r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
if (r) {
DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i);
goto out_cleanup;
}
r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence);
if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup;r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
@@ -155,13 +149,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj);
r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
if (r) {
DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i);
goto out_cleanup;
}
r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence);
if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup;r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
@@ -245,17 +233,6 @@ void radeon_test_ring_sync(struct radeon_device *rdev, int ridxB = radeon_ring_index(rdev, ringB); int r;
- r = radeon_fence_create(rdev, &fence1, ridxA);
- if (r) {
DRM_ERROR("Failed to create sync fence 1\n");
goto out_cleanup;
- }
- r = radeon_fence_create(rdev, &fence2, ridxA);
- if (r) {
DRM_ERROR("Failed to create sync fence 2\n");
goto out_cleanup;
- }
- r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n");
@@ -268,9 +245,19 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
- radeon_fence_emit(rdev, fence1);
- r = radeon_fence_emit(rdev, &fence1, ridxA);
- if (r) {
DRM_ERROR("Failed to emit fence 1\n");
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup;
- } radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
- radeon_fence_emit(rdev, fence2);
r = radeon_fence_emit(rdev, &fence2, ridxA);
if (r) {
DRM_ERROR("Failed to emit fence 2\n");
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup;
} radeon_ring_unlock_commit(rdev, ringA);
mdelay(1000);
@@ -342,17 +329,6 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, bool sigA, sigB; int i, r;
- r = radeon_fence_create(rdev, &fenceA, ridxA);
- if (r) {
DRM_ERROR("Failed to create sync fence 1\n");
goto out_cleanup;
- }
- r = radeon_fence_create(rdev, &fenceB, ridxB);
- if (r) {
DRM_ERROR("Failed to create sync fence 2\n");
goto out_cleanup;
- }
- r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n");
@@ -365,7 +341,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
- radeon_fence_emit(rdev, fenceA);
r = radeon_fence_emit(rdev, &fenceA, ridxA);
if (r) {
DRM_ERROR("Failed to emit sync fence 1\n");
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup;
} radeon_ring_unlock_commit(rdev, ringA);
r = radeon_ring_lock(rdev, ringB, 64);
@@ -374,7 +355,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxB, semaphore);
- radeon_fence_emit(rdev, fenceB);
r = radeon_fence_emit(rdev, &fenceB, ridxB);
if (r) {
DRM_ERROR("Failed to create sync fence 2\n");
radeon_ring_unlock_undo(rdev, ringB);
goto out_cleanup;
} radeon_ring_unlock_commit(rdev, ringB);
mdelay(1000);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c94a225..2d36bdd 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -222,15 +222,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, { struct radeon_device *rdev; uint64_t old_start, new_start;
- struct radeon_fence *fence, *old_fence;
- struct radeon_fence *fence; struct radeon_semaphore *sem = NULL;
- int r;
int r, ridx;
rdev = radeon_get_rdev(bo->bdev);
- r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev));
- if (unlikely(r)) {
return r;
- }
- ridx = radeon_copy_ring_index(rdev); old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT;
@@ -243,7 +240,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
return -EINVAL; } switch (new_mem->mem_type) {radeon_fence_unref(&fence);
@@ -255,42 +251,38 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
return -EINVAL; }radeon_fence_unref(&fence);
- if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) {
- if (!rdev->ring[ridx].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n");
radeon_fence_unref(&fence);
return -EINVAL; }
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
/* sync other rings */
old_fence = bo->sync_obj;
if (old_fence && old_fence->ring != fence->ring
&& !radeon_fence_signaled(old_fence)) {
- fence = bo->sync_obj;
- if (fence && fence->ring != ridx
bool sync_to_ring[RADEON_NUM_RINGS] = { };&& !radeon_fence_signaled(fence)) {
sync_to_ring[old_fence->ring] = true;
sync_to_ring[fence->ring] = true;
r = radeon_semaphore_create(rdev, &sem); if (r) {
radeon_fence_unref(&fence); return r;
}
r = radeon_semaphore_sync_rings(rdev, sem,
sync_to_ring, fence->ring);
if (r) { radeon_semaphore_free(rdev, sem, NULL);r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx);
} }radeon_fence_unref(&fence); return r;
- fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
fence);
/* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem);&fence);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 549732e..5ca8ef5 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1915,7 +1915,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, */ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) {
- struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
struct radeon_ring *ring = &rdev->ring[ib->ring]; u32 header;
if (ib->is_const_ib)
@@ -2855,7 +2855,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) if (ib->is_const_ib) ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); else {
switch (ib->fence->ring) {
switch (ib->ring) { case RADEON_RING_TYPE_GFX_INDEX: ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); break;
@@ -2864,7 +2864,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); break; default:
dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring); ret = -EINVAL; break; }
-- 1.7.9.5
dri-devel@lists.freedesktop.org