It is completely unnecessary to create fences before they are emitted, so remove it and a bunch of checks if fences are emitted or not.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/evergreen.c | 2 +- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r100.c | 4 +- drivers/gpu/drm/radeon/r200.c | 4 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 6 +-- drivers/gpu/drm/radeon/radeon.h | 11 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 8 ++-- drivers/gpu/drm/radeon/radeon_benchmark.c | 10 +---- drivers/gpu/drm/radeon/radeon_fence.c | 42 ++++++------------ drivers/gpu/drm/radeon/radeon_ring.c | 19 +++++---- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- drivers/gpu/drm/radeon/radeon_test.c | 66 ++++++++++++----------------- drivers/gpu/drm/radeon/radeon_ttm.c | 30 +++++-------- drivers/gpu/drm/radeon/si.c | 6 +-- 15 files changed, 86 insertions(+), 130 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 58991af..dd3cea4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1366,7 +1366,7 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b01c2dd..9d9f5ac 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1127,7 +1127,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index fb44e7e..415b7d8 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -883,7 +883,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; @@ -947,7 +947,7 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index a26144d..f088925 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; @@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index f388a1d..e5279f9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2369,7 +2369,7 @@ int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_sa_bo *vb = NULL; int r; @@ -2670,7 +2670,7 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring];
/* FIXME: implement */ radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 03b6e0d..02f4eeb 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -703,20 +703,20 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return 0; }
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r;
- r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { radeon_ring_unlock_undo(rdev, ring); return; }
radeon_ring_unlock_commit(rdev, ring); - radeon_sa_bo_free(rdev, &vb, fence); + radeon_sa_bo_free(rdev, &vb, *fence); }
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1dc3a4a..5e259b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,7 +113,6 @@ extern int radeon_lockup_timeout;
/* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL -#define RADEON_FENCE_NOTEMITED_SEQ (~0LL)
/* internal ring indices */ /* r1xx+ has gfx CP ring */ @@ -277,8 +276,7 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); @@ -627,6 +625,7 @@ struct radeon_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; + int ring; struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; @@ -1190,20 +1189,20 @@ struct radeon_asic { uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 blit_ring_index; int (*dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 dma_ring_index; /* method used for bo copy */ int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); /* ring used for bo copies */ u32 copy_ring_index; } copy; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e76a941..8cdf075 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); @@ -144,7 +144,7 @@ extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); void r200_set_safe_registers(struct radeon_device *rdev);
/* @@ -318,7 +318,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence *fence); + unsigned num_gpu_pages, struct radeon_fence **fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -364,7 +364,7 @@ void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_sa_bo **vb); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, struct radeon_sa_bo *vb); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 364f5b1..bedda9c 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -45,20 +45,14 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA: - r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev)); - if (r) - return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; case RADEON_BENCHMARK_COPY_BLIT: - r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev)); - if (r) - return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; default: DRM_ERROR("Unknown copy method\n"); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 11f5f40..401d346 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -61,15 +61,21 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; }
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) +int radeon_fence_emit(struct radeon_device *rdev, + struct radeon_fence **fence, + int ring) { /* we are protected by the ring emission mutex */ - if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { - return 0; + *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); + if ((*fence) == NULL) { + return -ENOMEM; } - fence->seq = ++rdev->fence_drv[fence->ring].seq; - radeon_fence_ring_emit(rdev, fence->ring, fence); - trace_radeon_fence_emit(rdev->ddev, fence->seq); + kref_init(&((*fence)->kref)); + (*fence)->rdev = rdev; + (*fence)->seq = ++rdev->fence_drv[ring].seq; + (*fence)->ring = ring; + radeon_fence_ring_emit(rdev, ring, *fence); + trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); return 0; }
@@ -138,25 +144,9 @@ static void radeon_fence_destroy(struct kref *kref) struct radeon_fence *fence;
fence = container_of(kref, struct radeon_fence, kref); - fence->seq = RADEON_FENCE_NOTEMITED_SEQ; kfree(fence); }
-int radeon_fence_create(struct radeon_device *rdev, - struct radeon_fence **fence, - int ring) -{ - *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); - if ((*fence) == NULL) { - return -ENOMEM; - } - kref_init(&((*fence)->kref)); - (*fence)->rdev = rdev; - (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; - (*fence)->ring = ring; - return 0; -} - static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { @@ -176,10 +166,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (!fence) { return true; } - if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { - WARN(1, "Querying an unemitted fence : %p !\n", fence); - return true; - } if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { return true; } @@ -444,9 +430,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; }
- if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { - seq[i] = fences[i]->seq; - } + seq[i] = fences[i]->seq; }
r = radeon_fence_wait_any_seq(rdev, seq, intr); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 493a7be..a0b9970 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -74,13 +74,9 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; } - r = radeon_fence_create(rdev, &ib->fence, ring); - if (r) { - dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r); - radeon_sa_bo_free(rdev, &ib->sa_bo, NULL); - return r; - }
+ ib->ring = ring; + ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; @@ -99,7 +95,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0;
if (!ib->length_dw || !ring->ready) { @@ -114,8 +110,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - radeon_ring_ib_execute(rdev, ib->fence->ring, ib); - radeon_fence_emit(rdev, ib->fence); + radeon_ring_ib_execute(rdev, ib->ring, ib); + r = radeon_fence_emit(rdev, &ib->fence, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; + } radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 32059b7..81dbb5b 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,7 +349,7 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->lock); - if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { + if (fence && !radeon_fence_signaled(fence)) { (*sa_bo)->fence = radeon_fence_ref(fence); list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[fence->ring]); diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index efff929..47e1535 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -106,13 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]);
- r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; @@ -155,13 +149,7 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj);
- r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; @@ -245,17 +233,6 @@ void radeon_test_ring_sync(struct radeon_device *rdev, int ridxB = radeon_ring_index(rdev, ringB); int r;
- r = radeon_fence_create(rdev, &fence1, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); - goto out_cleanup; - } - r = radeon_fence_create(rdev, &fence2, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - goto out_cleanup; - } - r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -268,9 +245,19 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence1); + r = radeon_fence_emit(rdev, &fence1, ridxA); + if (r) { + DRM_ERROR("Failed to emit fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence2); + r = radeon_fence_emit(rdev, &fence2, ridxA); + if (r) { + DRM_ERROR("Failed to emit fence 2\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringA);
mdelay(1000); @@ -342,17 +329,6 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, bool sigA, sigB; int i, r;
- r = radeon_fence_create(rdev, &fenceA, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); - goto out_cleanup; - } - r = radeon_fence_create(rdev, &fenceB, ridxB); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - goto out_cleanup; - } - r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -365,7 +341,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fenceA); + r = radeon_fence_emit(rdev, &fenceA, ridxA); + if (r) { + DRM_ERROR("Failed to emit sync fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringA);
r = radeon_ring_lock(rdev, ringB, 64); @@ -374,7 +355,12 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxB, semaphore); - radeon_fence_emit(rdev, fenceB); + r = radeon_fence_emit(rdev, &fenceB, ridxB); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + radeon_ring_unlock_undo(rdev, ringB); + goto out_cleanup; + } radeon_ring_unlock_commit(rdev, ringB);
mdelay(1000); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c94a225..2d36bdd 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -222,15 +222,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, { struct radeon_device *rdev; uint64_t old_start, new_start; - struct radeon_fence *fence, *old_fence; + struct radeon_fence *fence; struct radeon_semaphore *sem = NULL; - int r; + int r, ridx;
rdev = radeon_get_rdev(bo->bdev); - r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev)); - if (unlikely(r)) { - return r; - } + ridx = radeon_copy_ring_index(rdev); old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT;
@@ -243,7 +240,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } switch (new_mem->mem_type) { @@ -255,42 +251,38 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } - if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) { + if (!rdev->ring[ridx].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); - radeon_fence_unref(&fence); return -EINVAL; }
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
/* sync other rings */ - old_fence = bo->sync_obj; - if (old_fence && old_fence->ring != fence->ring - && !radeon_fence_signaled(old_fence)) { + fence = bo->sync_obj; + if (fence && fence->ring != ridx + && !radeon_fence_signaled(fence)) { bool sync_to_ring[RADEON_NUM_RINGS] = { }; - sync_to_ring[old_fence->ring] = true; + sync_to_ring[fence->ring] = true;
r = radeon_semaphore_create(rdev, &sem); if (r) { - radeon_fence_unref(&fence); return r; }
- r = radeon_semaphore_sync_rings(rdev, sem, - sync_to_ring, fence->ring); + r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx); if (r) { radeon_semaphore_free(rdev, sem, NULL); - radeon_fence_unref(&fence); return r; } }
+ fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ - fence); + &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 549732e..5ca8ef5 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1915,7 +1915,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, */ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; u32 header;
if (ib->is_const_ib) @@ -2855,7 +2855,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) if (ib->is_const_ib) ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); else { - switch (ib->fence->ring) { + switch (ib->ring) { case RADEON_RING_TYPE_GFX_INDEX: ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); break; @@ -2864,7 +2864,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); break; default: - dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring); + dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring); ret = -EINVAL; break; }
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 23 ++++++++++- drivers/gpu/drm/radeon/radeon_fence.c | 73 +++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5e259b4..4e232c3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -257,8 +257,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; - /* seq is protected by ring emission lock */ - uint64_t seq; + /* sync_seq is protected by ring emission lock */ + uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; unsigned long last_activity; bool initialized; @@ -288,6 +288,25 @@ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); +void radeon_fence_note_sync(struct radeon_fence *fence, int ring); +static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, + struct radeon_fence *b) +{ + if (!a) { + return b; + } + + if (!b) { + return a; + } + + if (a->seq > b->seq) { + return a; + } else { + return b; + } +}
/* * Tiling registers diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 401d346..7b55625 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -72,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].seq; + (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); @@ -449,7 +449,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * wait. */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; - if (seq >= rdev->fence_drv[ring].seq) { + if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; @@ -464,7 +464,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) * activity can be scheduled so there won't be concurrent access * to seq value. */ - return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, + return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring], ring, false, false); }
@@ -492,7 +492,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring); - emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); + emitted = rdev->fence_drv[ring].sync_seq[ring] + - atomic64_read(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; @@ -500,6 +501,51 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; }
+bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *fdrv; + + if (!fence) { + return false; + } + + if (fence->ring == dst_ring) { + return false; + } + + /* we are protected by the ring mutex */ + fdrv = &fence->rdev->fence_drv[dst_ring]; + if (fence->seq <= fdrv->sync_seq[fence->ring]) { + return false; + } + + return true; +} + +void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *dst, *src; + unsigned i; + + if (!fence) { + return; + } + + if (fence->ring == dst_ring) { + return; + } + + /* we are protected by the ring mutex */ + src = &fence->rdev->fence_drv[fence->ring]; + dst = &fence->rdev->fence_drv[dst_ring]; + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (i == dst_ring) { + continue; + } + dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); + } +} + int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -521,7 +567,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; - radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); + radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); @@ -530,10 +576,13 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) { + int i; + rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; - rdev->fence_drv[ring].seq = 0; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false; @@ -579,7 +628,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; - int i; + int i, j;
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) @@ -588,8 +637,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); - seq_printf(m, "Last emitted 0x%016llx\n", - rdev->fence_drv[i].seq); + seq_printf(m, "Last emitted 0x%016llx\n", + rdev->fence_drv[i].sync_seq[i]); + + for (j = 0; j < RADEON_NUM_RINGS; ++j) { + if (i != j && rdev->fence_drv[j].initialized) + seq_printf(m, "Last sync to ring %d 0x%016llx\n", + j, rdev->fence_drv[i].sync_seq[j]); + } } return 0; }
On Thu, May 31, 2012 at 4:15 PM, Christian König deathsimple@vodafone.de wrote:
Signed-off-by: Christian König deathsimple@vodafone.de
drivers/gpu/drm/radeon/radeon.h | 23 ++++++++++- drivers/gpu/drm/radeon/radeon_fence.c | 73 +++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5e259b4..4e232c3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -257,8 +257,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr;
- /* seq is protected by ring emission lock */
- uint64_t seq;
- /* sync_seq is protected by ring emission lock */
- uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq; unsigned long last_activity; bool initialized; @@ -288,6 +288,25 @@ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); +void radeon_fence_note_sync(struct radeon_fence *fence, int ring); +static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
- struct radeon_fence *b)
+{
- if (!a) {
- return b;
- }
- if (!b) {
- return a;
- }
Please add : BUG_ON(a->ring != b->ring);
So we can catch if someone badly use this function.
- if (a->seq > b->seq) {
- return a;
- } else {
- return b;
- }
+}
/* * Tiling registers diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 401d346..7b55625 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -72,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev;
- (*fence)->seq = ++rdev->fence_drv[ring].seq;
- (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
(*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); @@ -449,7 +449,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * wait. */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
- if (seq >= rdev->fence_drv[ring].seq) {
- if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
/* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; @@ -464,7 +464,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) * activity can be scheduled so there won't be concurrent access * to seq value. */
- return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq,
- return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring],
ring, false, false); }
@@ -492,7 +492,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring);
- emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq);
- emitted = rdev->fence_drv[ring].sync_seq[ring]
- - atomic64_read(&rdev->fence_drv[ring].last_seq);
/* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; @@ -500,6 +501,51 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; }
+bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) +{
- struct radeon_fence_driver *fdrv;
- if (!fence) {
- return false;
- }
- if (fence->ring == dst_ring) {
- return false;
- }
- /* we are protected by the ring mutex */
- fdrv = &fence->rdev->fence_drv[dst_ring];
- if (fence->seq <= fdrv->sync_seq[fence->ring]) {
- return false;
- }
- return true;
+}
+void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) +{
- struct radeon_fence_driver *dst, *src;
- unsigned i;
- if (!fence) {
- return;
- }
- if (fence->ring == dst_ring) {
- return;
- }
- /* we are protected by the ring mutex */
- src = &fence->rdev->fence_drv[fence->ring];
- dst = &fence->rdev->fence_drv[dst_ring];
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (i == dst_ring) {
- continue;
- }
- dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
- }
+}
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -521,7 +567,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
- radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
- radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); @@ -530,10 +576,13 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) {
- int i;
rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0;
- rdev->fence_drv[ring].seq = 0;
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
- rdev->fence_drv[ring].sync_seq[i] = 0;
atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false; @@ -579,7 +628,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private;
- int i;
- int i, j;
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) @@ -588,8 +637,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
- seq_printf(m, "Last emitted 0x%016llx\n",
- rdev->fence_drv[i].seq);
- seq_printf(m, "Last emitted 0x%016llx\n",
- rdev->fence_drv[i].sync_seq[i]);
- for (j = 0; j < RADEON_NUM_RINGS; ++j) {
- if (i != j && rdev->fence_drv[j].initialized)
- seq_printf(m, "Last sync to ring %d 0x%016llx\n",
- j, rdev->fence_drv[i].sync_seq[j]);
- }
} return 0; } -- 1.7.9.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
Move inter ring syncing with semaphores into the existing ring allocations, with that we need to lock the ring mutex only once.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/evergreen_blit_kms.c | 3 +- drivers/gpu/drm/radeon/r600.c | 5 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 24 +++++++-- drivers/gpu/drm/radeon/radeon.h | 6 +-- drivers/gpu/drm/radeon/radeon_asic.h | 5 +- drivers/gpu/drm/radeon/radeon_cs.c | 38 +++----------- drivers/gpu/drm/radeon/radeon_ring.c | 30 +++++++++-- drivers/gpu/drm/radeon/radeon_semaphore.c | 71 ++++++++++----------------- drivers/gpu/drm/radeon/radeon_test.c | 6 +-- drivers/gpu/drm/radeon/radeon_ttm.c | 20 -------- 10 files changed, 92 insertions(+), 116 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 1e96bd4..e512560 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e5279f9..a8d8c44 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2371,15 +2371,16 @@ int r600_copy_blit(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence) { + struct radeon_semaphore *sem = NULL; struct radeon_sa_bo *vb = NULL; int r;
- r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb); + r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); if (r) { return r; } r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); - r600_blit_done_copy(rdev, fence, vb); + r600_blit_done_copy(rdev, fence, vb, sem); return 0; }
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 02f4eeb..2b8d641 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state;
- rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
@@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb) + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return r; }
+ r = radeon_semaphore_create(rdev, sem); + if (r) { + radeon_sa_bo_free(rdev, vb, NULL); + return r; + } + /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring, ring_size); if (r) { radeon_sa_bo_free(rdev, vb, NULL); + radeon_semaphore_free(rdev, sem, NULL); return r; }
+ if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { + radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, + RADEON_RING_TYPE_GFX_INDEX); + radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); + } else { + radeon_semaphore_free(rdev, sem, NULL); + } + rdev->r600_blit.primitives.set_default_state(rdev); rdev->r600_blit.primitives.set_shaders(rdev); return 0; }
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb) + struct radeon_sa_bo *vb, struct radeon_semaphore *sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence
radeon_ring_unlock_commit(rdev, ring); radeon_sa_bo_free(rdev, &vb, *fence); + radeon_semaphore_free(rdev, &sem, *fence); }
void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4e232c3..aebaf28 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -465,10 +465,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring); + int signaler, int waiter); void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence);
/* @@ -648,6 +647,7 @@ struct radeon_ib { struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; };
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 8cdf075..94c427a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb); + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem); void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb); + struct radeon_sa_bo *vb, struct radeon_semaphore *sem); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, unsigned num_gpu_pages, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index c7d64a7..d295821 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }
-static int radeon_cs_sync_rings(struct radeon_cs_parser *p) +static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - bool need_sync = false; - int i, r; + int i;
for (i = 0; i < p->nrelocs; i++) { - struct radeon_fence *fence; + struct radeon_fence *a, *b;
if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) continue;
- fence = p->relocs[i].robj->tbo.sync_obj; - if (fence->ring != p->ring && !radeon_fence_signaled(fence)) { - sync_to_ring[fence->ring] = true; - need_sync = true; - } - } - - if (!need_sync) { - return 0; - } - - r = radeon_semaphore_create(p->rdev, &p->ib.semaphore); - if (r) { - return r; + a = p->relocs[i].robj->tbo.sync_obj; + b = p->ib.sync_to[a->ring]; + p->ib.sync_to[a->ring] = radeon_fence_later(a, b); } - - return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore, - sync_to_ring, p->ring); }
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) @@ -365,10 +349,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser); parser->ib.vm_id = 0; r = radeon_ib_schedule(rdev, &parser->ib); if (r) { @@ -465,10 +446,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index a0b9970..87f66c5 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -67,7 +67,7 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) { - int r; + int i, r;
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { @@ -75,20 +75,26 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }
+ r = radeon_semaphore_create(rdev, &ib->semaphore); + if (r) { + return r; + } + ib->ring = ring; ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; ib->is_const_ib = false; - ib->semaphore = NULL; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + ib->sync_to[i] = NULL;
return 0; }
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, ib->semaphore, ib->fence); + radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } @@ -96,7 +102,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - int r = 0; + bool need_sync = false; + int i, r = 0;
if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -105,11 +112,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) }
/* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64); + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = ib->sync_to[i]; + if (radeon_fence_need_sync(fence, ib->ring)) { + need_sync = true; + radeon_semaphore_sync_rings(rdev, ib->semaphore, + fence->ring, ib->ring); + radeon_fence_note_sync(fence, ib->ring); + } + } + /* immediately free semaphore when we don't need to sync */ + if (!need_sync) { + radeon_semaphore_free(rdev, &ib->semaphore, NULL); + } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index e2ace5d..7cc78de 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); }
+/* caller must hold ring lock */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring) + int signaler, int waiter) { - int i = 0, r; + int r;
- mutex_lock(&rdev->ring_lock); - r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8); - if (r) { - goto error; + /* no need to signal and wait on the same ring */ + if (signaler == waiter) { + return 0; }
- for (i = 0; i < RADEON_NUM_RINGS; ++i) { - /* no need to sync to our own or unused rings */ - if (!sync_to[i] || i == dst_ring) - continue; - - /* prevent GPU deadlocks */ - if (!rdev->ring[i].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - r = -EINVAL; - goto error; - } - - r = radeon_ring_alloc(rdev, &rdev->ring[i], 8); - if (r) { - goto error; - } - - radeon_semaphore_emit_signal(rdev, i, semaphore); - radeon_semaphore_emit_wait(rdev, dst_ring, semaphore); + /* prevent GPU deadlocks */ + if (!rdev->ring[signaler].ready) { + dev_err(rdev->dev, "Trying to sync to a disabled ring!"); + return -EINVAL; + }
- radeon_ring_commit(rdev, &rdev->ring[i]); + r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); + if (r) { + return r; } + radeon_semaphore_emit_signal(rdev, signaler, semaphore); + radeon_ring_commit(rdev, &rdev->ring[signaler]);
- radeon_ring_commit(rdev, &rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); + /* we assume caller has already allocated space on waiters ring */ + radeon_semaphore_emit_wait(rdev, waiter, semaphore);
return 0; - -error: - /* unlock all locks taken so far */ - for (--i; i >= 0; --i) { - if (sync_to[i] || i == dst_ring) { - radeon_ring_undo(&rdev->ring[i]); - } - } - radeon_ring_undo(&rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); - return r; }
void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence) { - if (semaphore == NULL) { + if (semaphore == NULL || *semaphore == NULL) { return; } - if (semaphore->waiters > 0) { + if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," - " hardware lockup imminent!\n", semaphore); + " hardware lockup imminent!\n", *semaphore); } - radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence); - kfree(semaphore); + radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence); + kfree(*semaphore); + *semaphore = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 47e1535..a94f66f 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, }
out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL);
if (fence1) radeon_fence_unref(&fence1); @@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, }
out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL);
if (fenceA) radeon_fence_unref(&fenceA); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 2d36bdd..c43035c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -223,7 +223,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; - struct radeon_semaphore *sem = NULL; int r, ridx;
rdev = radeon_get_rdev(bo->bdev); @@ -262,31 +261,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
/* sync other rings */ fence = bo->sync_obj; - if (fence && fence->ring != ridx - && !radeon_fence_signaled(fence)) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - sync_to_ring[fence->ring] = true; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - return r; - } - - r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx); - if (r) { - radeon_semaphore_free(rdev, sem, NULL); - return r; - } - } - - fence = NULL; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); - radeon_semaphore_free(rdev, sem, fence); radeon_fence_unref(&fence); return r; }
It is a rw_semaphore now and only write locked while changing the clock. Also the lock is renamed to better reflect what it is protecting.
v2: Keep the ttm_vm_ops on IGPs
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_pm.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 5 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aebaf28..3e83480 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1055,6 +1055,8 @@ struct radeon_power_state {
struct radeon_pm { struct mutex mutex; + /* write locked while reprogramming mclk */ + struct rw_semaphore mclk_lock; u32 active_crtcs; int active_crtc_count; int req_vblank; @@ -1551,7 +1553,6 @@ struct radeon_device { struct work_struct audio_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ - struct mutex vram_mutex; bool audio_enabled; struct r600_audio audio_status; /* audio stuff */ struct notifier_block acpi_nb; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 066c98b..7667184 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -735,7 +735,7 @@ int radeon_device_init(struct radeon_device *rdev, spin_lock_init(&rdev->ih.lock); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); - mutex_init(&rdev->vram_mutex); + init_rwsem(&rdev->pm.mclk_lock); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); r = radeon_gem_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 830f1a7..6ecb200 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -154,11 +154,11 @@ retry: INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, 0, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { if (domain == RADEON_GEM_DOMAIN_VRAM) { @@ -219,9 +219,9 @@ void radeon_bo_unref(struct radeon_bo **bo) return; rdev = (*bo)->rdev; tbo = &((*bo)->tbo); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); ttm_bo_unref(&tbo); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (tbo == NULL) *bo = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 0882554..d13b6ae 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -251,7 +251,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) return;
mutex_lock(&rdev->ddev->struct_mutex); - mutex_lock(&rdev->vram_mutex); + down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock);
/* gui idle int has issues on older chips it seems */ @@ -303,7 +303,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE;
mutex_unlock(&rdev->ring_lock); - mutex_unlock(&rdev->vram_mutex); + up_write(&rdev->pm.mclk_lock); mutex_unlock(&rdev->ddev->struct_mutex); }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c43035c..0881131 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -797,9 +797,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_vm_ops->fault(vma, vmf); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); return r; }
From: Christian Koenig christian.koenig@amd.com
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3e83480..618df9a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,9 +733,7 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; - unsigned rptr_offs; unsigned wptr; - unsigned wptr_old; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask;
From: Christian Koenig christian.koenig@amd.com
The spinlock was actually there to protect the rptr, but rptr was read outside of the locked area.
v2: Keep the spinlock, looks like there is still something in the ih that doesn't like irqs enabled.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 5 ++--- drivers/gpu/drm/radeon/r600.c | 7 ++----- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/si.c | 6 ++---- 4 files changed, 6 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index dd3cea4..e4c3321 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2950,11 +2950,11 @@ int evergreen_irq_process(struct radeon_device *rdev) if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE;
+ spin_lock_irqsave(&rdev->ih.lock, flags); wptr = evergreen_get_ih_wptr(rdev); rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); if (rptr == wptr) { spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_NONE; @@ -2966,7 +2966,6 @@ restart_ih: /* display interrupts */ evergreen_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3267,7 +3266,7 @@ restart_ih: } /* make sure wptr hasn't changed while processing */ wptr = evergreen_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) + if (wptr != rptr) goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a8d8c44..e1861ac 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2921,7 +2921,6 @@ void r600_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; }
@@ -3384,12 +3383,11 @@ int r600_irq_process(struct radeon_device *rdev) if (!rdev->msi_enabled) RREG32(IH_RB_WPTR);
+ spin_lock_irqsave(&rdev->ih.lock, flags); wptr = r600_get_ih_wptr(rdev); rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); - if (rptr == wptr) { spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_NONE; @@ -3402,7 +3400,6 @@ restart_ih: /* display interrupts */ r600_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3558,7 +3555,7 @@ restart_ih: } /* make sure wptr hasn't changed while processing */ wptr = r600_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) + if (wptr != rptr) goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 618df9a..378d43b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -733,7 +733,6 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; - unsigned wptr; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5ca8ef5..93da01c 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3095,7 +3095,6 @@ static void si_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; }
@@ -3518,11 +3517,11 @@ int si_irq_process(struct radeon_device *rdev) if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE;
+ spin_lock_irqsave(&rdev->ih.lock, flags); wptr = si_get_ih_wptr(rdev); rptr = rdev->ih.rptr; DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
- spin_lock_irqsave(&rdev->ih.lock, flags); if (rptr == wptr) { spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_NONE; @@ -3534,7 +3533,6 @@ restart_ih: /* display interrupts */ si_irq_ack(rdev);
- rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3787,7 +3785,7 @@ restart_ih: } /* make sure wptr hasn't changed while processing */ wptr = si_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) + if (wptr != rptr) goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work);
From: Christian Koenig christian.koenig@amd.com
1. It is really dangerous to have more than one spinlock protecting the same information.
2. radeon_irq_set sometimes wasn't called with lock protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
3. The pm.gui_idle variable was assuming that the 3D engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
v2: Also handle the hpd irq code the same way.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 21 ++----- drivers/gpu/drm/radeon/r100.c | 29 ++-------- drivers/gpu/drm/radeon/r600.c | 38 ++++-------- drivers/gpu/drm/radeon/r600_hdmi.c | 6 +- drivers/gpu/drm/radeon/radeon.h | 35 +++++------ drivers/gpu/drm/radeon/radeon_irq_kms.c | 96 +++++++++++++++++++++++++++---- drivers/gpu/drm/radeon/radeon_kms.c | 12 +++- drivers/gpu/drm/radeon/radeon_pm.c | 12 +--- drivers/gpu/drm/radeon/rs600.c | 13 ++--- drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 144 insertions(+), 119 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index e4c3321..48ec1a0 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -428,6 +428,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | DC_HPDx_RX_INT_TIMER(0xfa) | DC_HPDx_EN;
@@ -436,73 +437,64 @@ void evergreen_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); - rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); - rdev->irq.hpd[3] = true; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); - rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); - rdev->irq.hpd[5] = true; break; default: break; } radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); + enabled |= 1 << radeon_connector->hpd.hpd; } - if (rdev->irq.installed) - evergreen_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enabled); }
void evergreen_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disabled = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); - rdev->irq.hpd[3] = false; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); - rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); - rdev->irq.hpd[5] = false; break; default: break; } + disabled |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disabled); }
/* watermark setup */ @@ -3252,7 +3244,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..e8fe4ae 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -567,43 +567,27 @@ void r100_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - switch (radeon_connector->hpd.hpd) { - case RADEON_HPD_1: - rdev->irq.hpd[0] = true; - break; - case RADEON_HPD_2: - rdev->irq.hpd[1] = true; - break; - default: - break; - } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - r100_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); }
void r100_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - switch (radeon_connector->hpd.hpd) { - case RADEON_HPD_1: - rdev->irq.hpd[0] = false; - break; - case RADEON_HPD_2: - rdev->irq.hpd[1] = false; - break; - default: - break; - } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); }
/* @@ -782,7 +766,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e1861ac..72b5084 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -709,6 +709,7 @@ void r600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -729,28 +730,22 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); - rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); - rdev->irq.hpd[3] = true; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); - rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); - rdev->irq.hpd[5] = true; break; default: break; @@ -759,85 +754,73 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[2] = true; break; default: break; } } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - r600_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); }
void r600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0;
- if (ASIC_IS_DCE3(rdev)) { - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + if (ASIC_IS_DCE3(rdev)) { switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); - rdev->irq.hpd[3] = false; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); - rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); - rdev->irq.hpd[5] = false; break; default: break; } - } - } else { - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + } else { switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; default: break; } } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); }
/* @@ -3541,7 +3524,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */ - rdev->irq.afmt[dig->afmt->id] = true; - radeon_irq_set(rdev); + radeon_irq_kms_enable_afmt(rdev, dig->afmt->id); }
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */ - rdev->irq.afmt[dig->afmt->id] = false; - radeon_irq_set(rdev); + radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 378d43b..8a2a1f4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq { - bool installed; - bool sw_int[RADEON_NUM_RINGS]; - bool crtc_vblank_int[RADEON_MAX_CRTCS]; - bool pflip[RADEON_MAX_CRTCS]; - wait_queue_head_t vblank_queue; - bool hpd[RADEON_MAX_HPD_PINS]; - bool gui_idle; - bool gui_idle_acked; - wait_queue_head_t idle_queue; - bool afmt[RADEON_MAX_AFMT_BLOCKS]; - spinlock_t sw_lock; - int sw_refcount[RADEON_NUM_RINGS]; - union radeon_irq_stat_regs stat_regs; - spinlock_t pflip_lock[RADEON_MAX_CRTCS]; - int pflip_refcount[RADEON_MAX_CRTCS]; + bool installed; + spinlock_t lock; + bool sw_int[RADEON_NUM_RINGS]; + int sw_refcount[RADEON_NUM_RINGS]; + bool crtc_vblank_int[RADEON_MAX_CRTCS]; + bool pflip[RADEON_MAX_CRTCS]; + int pflip_refcount[RADEON_MAX_CRTCS]; + wait_queue_head_t vblank_queue; + bool hpd[RADEON_MAX_HPD_PINS]; + bool gui_idle; + bool gui_idle_acked; + wait_queue_head_t idle_queue; + bool afmt[RADEON_MAX_AFMT_BLOCKS]; + union radeon_irq_stat_regs stat_regs; };
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,11 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned crtcs); +void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned crtcs); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1062,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync; - bool gui_idle; fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..73cd0fd 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200 + irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); /* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
dev->max_vblank_count = 0x001fffff; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i;
if (rdev == NULL) { return; } + spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) { - int i; int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock); - for (i = 0; i < rdev->num_crtc; i++) - spin_lock_init(&rdev->irq.pflip_lock[i]); + spin_lock_init(&rdev->irq.lock); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,76 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + spin_lock_irqsave(&rdev->irq.lock, irqflags); BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + +} + +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned crtcs) +{ + unsigned long irqflags; + int i; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) + rdev->irq.hpd[i] |= !!(crtcs & (1 << i)); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned crtcs) +{ + unsigned long irqflags; + int i; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) + rdev->irq.hpd[i] &= !(crtcs & (1 << i)); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{ + unsigned long irqflags; + int r; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + + r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev), + msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..abbb04d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,29 +382,35 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; + int r;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; }
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = true; - - return radeon_irq_set(rdev); + r = radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; }
void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; }
+ spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = false; - radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d13b6ae..79642cd 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200
static const char *radeon_pm_state_type_name[5] = { "Default", @@ -257,15 +256,8 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) { - /* wait for GPU idle */ - rdev->pm.gui_idle = false; - rdev->irq.gui_idle = true; - radeon_irq_set(rdev); - wait_event_interruptible_timeout( - rdev->irq.idle_queue, rdev->pm.gui_idle, - msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); - rdev->irq.gui_idle = false; - radeon_irq_set(rdev); + /* wait for GPU to become idle */ + radeon_irq_kms_wait_gui_idle(rdev); } } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..8387709 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -294,6 +294,7 @@ void rs600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -301,26 +302,25 @@ void rs600_hpd_init(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(1)); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(1)); - rdev->irq.hpd[1] = true; break; default: break; } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - rs600_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); }
void rs600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -328,17 +328,17 @@ void rs600_hpd_fini(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(0)); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(0)); - rdev->irq.hpd[1] = false; break; default: break; } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); }
int rs600_asic_reset(struct radeon_device *rdev) @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 93da01c..9e691b5 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3771,7 +3771,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default:
On Thu, May 31, 2012 at 4:16 PM, Christian König deathsimple@vodafone.de wrote:
From: Christian Koenig christian.koenig@amd.com
- It is really dangerous to have more than one
spinlock protecting the same information.
- radeon_irq_set sometimes wasn't called with lock
protection, so it can happen that more than one CPU would tamper with the irq regs at the same time.
- The pm.gui_idle variable was assuming that the 3D
engine wasn't becoming idle between testing the register and setting the variable. So just remove it and test the register directly.
v2: Also handle the hpd irq code the same way.
couple of comments below for clarity, other than that:
Reviewed-by: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Christian Koenig christian.koenig@amd.com
drivers/gpu/drm/radeon/evergreen.c | 21 ++----- drivers/gpu/drm/radeon/r100.c | 29 ++-------- drivers/gpu/drm/radeon/r600.c | 38 ++++-------- drivers/gpu/drm/radeon/r600_hdmi.c | 6 +- drivers/gpu/drm/radeon/radeon.h | 35 +++++------ drivers/gpu/drm/radeon/radeon_irq_kms.c | 96 +++++++++++++++++++++++++++---- drivers/gpu/drm/radeon/radeon_kms.c | 12 +++- drivers/gpu/drm/radeon/radeon_pm.c | 12 +--- drivers/gpu/drm/radeon/rs600.c | 13 ++--- drivers/gpu/drm/radeon/si.c | 1 - 10 files changed, 144 insertions(+), 119 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index e4c3321..48ec1a0 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -428,6 +428,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned enabled = 0;
u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | DC_HPDx_RX_INT_TIMER(0xfa) | DC_HPDx_EN;
@@ -436,73 +437,64 @@ void evergreen_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp);
- rdev->irq.hpd[0] = true;
break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp);
- rdev->irq.hpd[1] = true;
break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp);
- rdev->irq.hpd[2] = true;
break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp);
- rdev->irq.hpd[3] = true;
break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp);
- rdev->irq.hpd[4] = true;
break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp);
- rdev->irq.hpd[5] = true;
break; default: break; } radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
- enabled |= 1 << radeon_connector->hpd.hpd;
}
- if (rdev->irq.installed)
- evergreen_irq_set(rdev);
- radeon_irq_kms_enable_hpd(rdev, enabled);
}
void evergreen_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned disabled = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0);
- rdev->irq.hpd[0] = false;
break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0);
- rdev->irq.hpd[1] = false;
break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0);
- rdev->irq.hpd[2] = false;
break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0);
- rdev->irq.hpd[3] = false;
break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0);
- rdev->irq.hpd[4] = false;
break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0);
- rdev->irq.hpd[5] = false;
break; default: break; }
- disabled |= 1 << radeon_connector->hpd.hpd;
}
- radeon_irq_kms_disable_hpd(rdev, disabled);
}
/* watermark setup */ @@ -3252,7 +3244,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 415b7d8..e8fe4ae 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -567,43 +567,27 @@ void r100_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- switch (radeon_connector->hpd.hpd) {
- case RADEON_HPD_1:
- rdev->irq.hpd[0] = true;
- break;
- case RADEON_HPD_2:
- rdev->irq.hpd[1] = true;
- break;
- default:
- break;
- }
- enable |= 1 << radeon_connector->hpd.hpd;
radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); }
- if (rdev->irq.installed)
- r100_irq_set(rdev);
- radeon_irq_kms_enable_hpd(rdev, enable);
}
void r100_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned disable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- switch (radeon_connector->hpd.hpd) {
- case RADEON_HPD_1:
- rdev->irq.hpd[0] = false;
- break;
- case RADEON_HPD_2:
- rdev->irq.hpd[1] = false;
- break;
- default:
- break;
- }
- disable |= 1 << radeon_connector->hpd.hpd;
}
- radeon_irq_kms_disable_hpd(rdev, disable);
}
/* @@ -782,7 +766,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e1861ac..72b5084 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -709,6 +709,7 @@ void r600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -729,28 +730,22 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp);
- rdev->irq.hpd[0] = true;
break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp);
- rdev->irq.hpd[1] = true;
break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp);
- rdev->irq.hpd[2] = true;
break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp);
- rdev->irq.hpd[3] = true;
break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp);
- rdev->irq.hpd[4] = true;
break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp);
- rdev->irq.hpd[5] = true;
break; default: break; @@ -759,85 +754,73 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, DC_HOT_PLUG_DETECTx_EN);
- rdev->irq.hpd[0] = true;
break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, DC_HOT_PLUG_DETECTx_EN);
- rdev->irq.hpd[1] = true;
break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, DC_HOT_PLUG_DETECTx_EN);
- rdev->irq.hpd[2] = true;
break; default: break; } }
- enable |= 1 << radeon_connector->hpd.hpd;
radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); }
- if (rdev->irq.installed)
- r600_irq_set(rdev);
- radeon_irq_kms_enable_hpd(rdev, enable);
}
void r600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned disable = 0;
- if (ASIC_IS_DCE3(rdev)) {
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- if (ASIC_IS_DCE3(rdev)) {
switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0);
- rdev->irq.hpd[0] = false;
break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0);
- rdev->irq.hpd[1] = false;
break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0);
- rdev->irq.hpd[2] = false;
break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0);
- rdev->irq.hpd[3] = false;
break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0);
- rdev->irq.hpd[4] = false;
break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0);
- rdev->irq.hpd[5] = false;
break; default: break; }
- }
- } else {
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- } else {
switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, 0);
- rdev->irq.hpd[0] = false;
break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, 0);
- rdev->irq.hpd[1] = false;
break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, 0);
- rdev->irq.hpd[2] = false;
break; default: break; } }
- disable |= 1 << radeon_connector->hpd.hpd;
}
- radeon_irq_kms_disable_hpd(rdev, disable);
}
/* @@ -3541,7 +3524,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..b76c0f2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -523,8 +523,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
if (rdev->irq.installed) { /* if irq is available use it */
- rdev->irq.afmt[dig->afmt->id] = true;
- radeon_irq_set(rdev);
- radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
}
dig->afmt->enabled = true; @@ -560,8 +559,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id);
/* disable irq */
- rdev->irq.afmt[dig->afmt->id] = false;
- radeon_irq_set(rdev);
- radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
/* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 378d43b..8a2a1f4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -610,21 +610,20 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6
struct radeon_irq {
- bool installed;
- bool sw_int[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- spinlock_t sw_lock;
- int sw_refcount[RADEON_NUM_RINGS];
- union radeon_irq_stat_regs stat_regs;
- spinlock_t pflip_lock[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- bool installed;
- spinlock_t lock;
- bool sw_int[RADEON_NUM_RINGS];
- int sw_refcount[RADEON_NUM_RINGS];
- bool crtc_vblank_int[RADEON_MAX_CRTCS];
- bool pflip[RADEON_MAX_CRTCS];
- int pflip_refcount[RADEON_MAX_CRTCS];
- wait_queue_head_t vblank_queue;
- bool hpd[RADEON_MAX_HPD_PINS];
- bool gui_idle;
- bool gui_idle_acked;
- wait_queue_head_t idle_queue;
- bool afmt[RADEON_MAX_AFMT_BLOCKS];
- union radeon_irq_stat_regs stat_regs;
};
int radeon_irq_kms_init(struct radeon_device *rdev); @@ -633,6 +632,11 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned crtcs); +void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned crtcs); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
/* * CP & rings. @@ -1058,7 +1062,6 @@ struct radeon_pm { int active_crtc_count; int req_vblank; bool vblank_sync;
- bool gui_idle;
fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1..73cd0fd 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,8 @@ #include "radeon.h" #include "atom.h"
+#define RADEON_WAIT_IDLE_TIMEOUT 200
irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -62,8 +64,10 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -76,6 +80,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
/* Clear bits */ radeon_irq_process(rdev); } @@ -83,23 +88,28 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = true; radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
return 0; }
void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
unsigned i;
if (rdev == NULL) { return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
/* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) rdev->irq.sw_int[i] = false; @@ -112,6 +122,7 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.afmt[i] = false; } radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
static bool radeon_msi_ok(struct radeon_device *rdev) @@ -168,15 +179,12 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
int radeon_irq_kms_init(struct radeon_device *rdev) {
- int i;
int r = 0;
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
- spin_lock_init(&rdev->irq.sw_lock);
- for (i = 0; i < rdev->num_crtc; i++)
- spin_lock_init(&rdev->irq.pflip_lock[i]);
- spin_lock_init(&rdev->irq.lock);
r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -217,25 +225,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -245,12 +253,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -260,12 +268,76 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); }
- spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{
- unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.afmt[block] = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned crtcs)
These aren't crtcs. I'd replace crtcs with hpd_mask for clarity.
+{
- unsigned long irqflags;
- int i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- for (i = 0; i < RADEON_MAX_HPD_PINS; ++i)
- rdev->irq.hpd[i] |= !!(crtcs & (1 << i));
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned crtcs)
same here.
+{
- unsigned long irqflags;
- int i;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- for (i = 0; i < RADEON_MAX_HPD_PINS; ++i)
- rdev->irq.hpd[i] &= !(crtcs & (1 << i));
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{
- unsigned long irqflags;
- int r;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev),
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
+} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..abbb04d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,29 +382,35 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
- int r;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = true;
- return radeon_irq_set(rdev);
- r = radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- return r;
}
void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; }
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
rdev->irq.crtc_vblank_int[crtc] = false;
radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d13b6ae..79642cd 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200
static const char *radeon_pm_state_type_name[5] = { "Default", @@ -257,15 +256,8 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) {
- /* wait for GPU idle */
- rdev->pm.gui_idle = false;
- rdev->irq.gui_idle = true;
- radeon_irq_set(rdev);
- wait_event_interruptible_timeout(
- rdev->irq.idle_queue, rdev->pm.gui_idle,
- msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
- rdev->irq.gui_idle = false;
- radeon_irq_set(rdev);
- /* wait for GPU to become idle */
- radeon_irq_kms_wait_gui_idle(rdev);
} } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..8387709 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -294,6 +294,7 @@ void rs600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned enable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -301,26 +302,25 @@ void rs600_hpd_init(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(1));
- rdev->irq.hpd[0] = true;
break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(1));
- rdev->irq.hpd[1] = true;
break; default: break; }
- enable |= 1 << radeon_connector->hpd.hpd;
radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); }
- if (rdev->irq.installed)
- rs600_irq_set(rdev);
- radeon_irq_kms_enable_hpd(rdev, enable);
}
void rs600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector;
- unsigned disable = 0;
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -328,17 +328,17 @@ void rs600_hpd_fini(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(0));
- rdev->irq.hpd[0] = false;
break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(0));
- rdev->irq.hpd[1] = false;
break; default: break; }
- disable |= 1 << radeon_connector->hpd.hpd;
}
- radeon_irq_kms_disable_hpd(rdev, disable);
}
int rs600_asic_reset(struct radeon_device *rdev) @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true;
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 93da01c..9e691b5 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3771,7 +3771,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n");
- rdev->pm.gui_idle = true;
wake_up(&rdev->irq.idle_queue); break; default: -- 1.7.9.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
From: Christian Koenig christian.koenig@amd.com
So we can skip the looking. Also renames sw_int to ring_int, cause that better matches its purpose.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/evergreen.c | 32 ++++++++--------- drivers/gpu/drm/radeon/r100.c | 10 +++--- drivers/gpu/drm/radeon/r600.c | 10 +++--- drivers/gpu/drm/radeon/radeon.h | 6 ++-- drivers/gpu/drm/radeon/radeon_irq_kms.c | 59 +++++++++++++++---------------- drivers/gpu/drm/radeon/rs600.c | 10 +++--- drivers/gpu/drm/radeon/si.c | 30 ++++++++-------- 7 files changed, 76 insertions(+), 81 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 48ec1a0..3d3520a 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2607,20 +2607,20 @@ int evergreen_irq_set(struct radeon_device *rdev)
if (rdev->family >= CHIP_CAYMAN) { /* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } } else { - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; @@ -2628,32 +2628,32 @@ int evergreen_irq_set(struct radeon_device *rdev) }
if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("evergreen_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("evergreen_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("evergreen_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("evergreen_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("evergreen_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -2974,7 +2974,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3000,7 +3000,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3026,7 +3026,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3052,7 +3052,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3078,7 +3078,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3104,7 +3104,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index e8fe4ae..35825bf 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -689,18 +689,18 @@ int r100_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= RADEON_SW_INT_ENABLE; } if (rdev->irq.gui_idle) { tmp |= RADEON_GUI_IDLE_MASK; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { tmp |= RADEON_CRTC_VBLANK_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { tmp |= RADEON_CRTC2_VBLANK_MASK; } if (rdev->irq.hpd[0]) { @@ -775,7 +775,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (status & RADEON_CRTC2_VBLANK_STAT) { @@ -784,7 +784,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (status & RADEON_FP_DETECT_STAT) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 72b5084..e9d8f4b 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3088,18 +3088,18 @@ int r600_irq_set(struct radeon_device *rdev) hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; }
- if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("r600_irq_set: vblank 0\n"); mode_int |= D1MODE_VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("r600_irq_set: vblank 1\n"); mode_int |= D2MODE_VBLANK_INT_MASK; } @@ -3399,7 +3399,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3425,7 +3425,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8a2a1f4..19e7554 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -612,11 +612,9 @@ union radeon_irq_stat_regs { struct radeon_irq { bool installed; spinlock_t lock; - bool sw_int[RADEON_NUM_RINGS]; - int sw_refcount[RADEON_NUM_RINGS]; + atomic_t ring_int[RADEON_NUM_RINGS]; bool crtc_vblank_int[RADEON_MAX_CRTCS]; - bool pflip[RADEON_MAX_CRTCS]; - int pflip_refcount[RADEON_MAX_CRTCS]; + atomic_t pflip[RADEON_MAX_CRTCS]; wait_queue_head_t vblank_queue; bool hpd[RADEON_MAX_HPD_PINS]; bool gui_idle; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 73cd0fd..52f85ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -70,13 +70,13 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); @@ -87,16 +87,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { - struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; - unsigned i; - dev->max_vblank_count = 0x001fffff; - spin_lock_irqsave(&rdev->irq.lock, irqflags); - for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = true; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0; }
@@ -112,13 +103,13 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); @@ -225,25 +216,28 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { - rdev->irq.sw_int[ring] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { - rdev->irq.sw_int[ring] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -253,12 +247,14 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { - rdev->irq.pflip[crtc] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -268,13 +264,14 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return;
- spin_lock_irqsave(&rdev->irq.lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { - rdev->irq.pflip[crtc] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); }
void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 8387709..e6c2e96 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -564,18 +564,18 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= S_000040_SW_INT_EN(1); } if (rdev->irq.gui_idle) { tmp |= S_000040_GUI_IDLE(1); } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1); } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1); } if (rdev->irq.hpd[0]) { @@ -695,7 +695,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { @@ -704,7 +704,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 9e691b5..407ca6e 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3245,45 +3245,45 @@ int si_irq_set(struct radeon_device *rdev) hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
/* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("si_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("si_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("si_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("si_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("si_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("si_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("si_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -3550,7 +3550,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3576,7 +3576,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3602,7 +3602,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3628,7 +3628,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3654,7 +3654,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3680,7 +3680,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n");
On Don, 2012-05-31 at 22:16 +0200, Christian König wrote:
So we can skip the looking.
'locking'
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 73cd0fd..52f85ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -87,16 +87,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
int radeon_driver_irq_postinstall_kms(struct drm_device *dev) {
- struct radeon_device *rdev = dev->dev_private;
- unsigned long irqflags;
- unsigned i;
- dev->max_vblank_count = 0x001fffff;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- for (i = 0; i < RADEON_NUM_RINGS; i++)
rdev->irq.sw_int[i] = true;
- radeon_irq_set(rdev);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags); return 0;
}
Why does this function no longer need to enable SW interrupts? If it really doesn't, that might be material for a separate patch.
@@ -225,25 +216,28 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) {
rdev->irq.sw_int[ring] = true;
- if (!rdev->ddev->irq_enabled)
return;
- if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) {
radeon_irq_set(rdev);spin_lock_irqsave(&rdev->irq.lock, irqflags);
}spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags;
- spin_lock_irqsave(&rdev->irq.lock, irqflags);
- BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0);
- if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) {
rdev->irq.sw_int[ring] = false;
- if (!rdev->ddev->irq_enabled)
return;
- if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) {
radeon_irq_set(rdev);spin_lock_irqsave(&rdev->irq.lock, irqflags);
}spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
- spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
I think this might introduce a race condition:
Thread 0 Thread 1 -------- -------- atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
=> the interrupt won't be enabled.
Maybe this explains why you couldn't remove the spinlock in patch 6?
On Fre, 2012-06-01 at 08:19 +0200, Michel Dänzer wrote:
I think this might introduce a race condition:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
=> the interrupt won't be enabled.
Hrmm, I messed up the formatting there, let me try one more time:
Thread 0 Thread 1 -------- -------- atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
On 01.06.2012 08:30, Michel Dänzer wrote:
On Fre, 2012-06-01 at 08:19 +0200, Michel Dänzer wrote:
I think this might introduce a race condition:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
=> the interrupt won't be enabled.
Hrmm, I messed up the formatting there, let me try one more time:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
Nope that isn't a problem, cause what you really get in your example is:
Thread 0 Thread 1 -------- -------- atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set() spin_unlock_irqrestore() spin_lock_irqsave() radeon_irq_set() spin_unlock_irqrestore()
So testing the atomic counters just determines if we need an update of the irq registers or not, and since a significant change will always trigger an update we can make sure that the irq regs are always set to the last known state. We might call radeon_irq_set more often than necessary, but that won't hurt us and is really unlikely.
Also I have found the real reason why using the atomic for preventing ih recursion didn't worked as expected - it was just a stupid typo in my patch. But thanks for the comment anyway, it got me to look into the right direction for the bug.
Cheers, Christian.
On Fre, 2012-06-01 at 12:44 +0200, Christian König wrote:
On 01.06.2012 08:30, Michel Dänzer wrote:
On Fre, 2012-06-01 at 08:19 +0200, Michel Dänzer wrote:
I think this might introduce a race condition:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
=> the interrupt won't be enabled.
Hrmm, I messed up the formatting there, let me try one more time:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set()
Nope that isn't a problem, cause what you really get in your example is:
Thread 0 Thread 1
atomic_inc_return() returns 1 spin_lock_irqsave() atomic_dec_and_test() radeon_irq_set() spin_unlock_irqrestore() spin_lock_irqsave() radeon_irq_set() spin_unlock_irqrestore()
So testing the atomic counters just determines if we need an update of the irq registers or not, and since a significant change will always trigger an update we can make sure that the irq regs are always set to the last known state. We might call radeon_irq_set more often than necessary, but that won't hurt us and is really unlikely.
Yeah, I also realized in the meantime that the race can't happen. I blame it on lack of caffeine. :)
Also I have found the real reason why using the atomic for preventing ih recursion didn't worked as expected - it was just a stupid typo in my patch. But thanks for the comment anyway, it got me to look into the right direction for the bug.
Glad to hear that!
Try to remove or replace the cs_mutex with a vm_mutex where it is still needed.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 44 +------------------------------- drivers/gpu/drm/radeon/radeon_cs.c | 7 ++--- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 16 ++++++------ drivers/gpu/drm/radeon/radeon_gem.c | 2 -- 5 files changed, 12 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 19e7554..b24877f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -159,48 +159,6 @@ static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len) #endif bool radeon_get_bios(struct radeon_device *rdev);
- -/* - * Mutex which allows recursive locking from the same process. - */ -struct radeon_mutex { - struct mutex mutex; - struct task_struct *owner; - int level; -}; - -static inline void radeon_mutex_init(struct radeon_mutex *mutex) -{ - mutex_init(&mutex->mutex); - mutex->owner = NULL; - mutex->level = 0; -} - -static inline void radeon_mutex_lock(struct radeon_mutex *mutex) -{ - if (mutex_trylock(&mutex->mutex)) { - /* The mutex was unlocked before, so it's ours now */ - mutex->owner = current; - } else if (mutex->owner != current) { - /* Another process locked the mutex, take it */ - mutex_lock(&mutex->mutex); - mutex->owner = current; - } - /* Otherwise the mutex was already locked by this process */ - - mutex->level++; -} - -static inline void radeon_mutex_unlock(struct radeon_mutex *mutex) -{ - if (--mutex->level > 0) - return; - - mutex->owner = NULL; - mutex_unlock(&mutex->mutex); -} - - /* * Dummy page */ @@ -1529,7 +1487,6 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; - struct radeon_mutex cs_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool shutdown; @@ -1563,6 +1520,7 @@ struct radeon_device { struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; unsigned debugfs_count; /* virtual memory */ + struct mutex vm_mutex; struct radeon_vm_manager vm_manager; };
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d295821..d1ead9c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -438,6 +438,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, }
mutex_lock(&vm->mutex); + mutex_lock(&rdev->vm_mutex); r = radeon_vm_bind(rdev, vm); if (r) { goto out; @@ -474,6 +475,7 @@ out: } vm->fence = radeon_fence_ref(parser->ib.fence); } + mutex_unlock(&rdev->vm_mutex); mutex_unlock(&fpriv->vm.mutex); return r; } @@ -494,9 +496,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_parser parser; int r;
- radeon_mutex_lock(&rdev->cs_mutex); if (!rdev->accel_working) { - radeon_mutex_unlock(&rdev->cs_mutex); return -EBUSY; } /* initialize parser */ @@ -510,7 +510,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_parser_relocs(&parser); @@ -519,7 +518,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_ib_chunk(rdev, &parser); @@ -533,7 +531,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7667184..57143dd 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -728,7 +728,6 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we * can recall function without having locking issues */ - radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex); if (rdev->family >= CHIP_R600) @@ -742,6 +741,7 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; /* initialize vm here */ + mutex_init(&rdev->vm_mutex); rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4fa5402..7226d2b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -356,13 +356,13 @@ int radeon_vm_manager_suspend(struct radeon_device *rdev) { struct radeon_vm *vm, *tmp;
- radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); /* unbind all active vm */ list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } rdev->vm_manager.funcs->fini(rdev); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); }
@@ -478,9 +478,9 @@ int radeon_vm_bo_add(struct radeon_device *rdev, if (last_pfn > vm->last_pfn) { /* grow va space 32M by 32M */ unsigned align = ((32 << 20) >> 12) - 1; - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_unbind_locked(rdev, vm); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); vm->last_pfn = (last_pfn + align) & ~align; } head = &vm->va; @@ -596,9 +596,9 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, return 0;
mutex_lock(&vm->mutex); - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list); @@ -643,9 +643,9 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
mutex_lock(&vm->mutex);
- radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_mutex); radeon_vm_unbind_locked(rdev, vm); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_mutex);
/* remove all bo */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c0130b0..527d96e 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -159,11 +159,9 @@ void radeon_gem_object_close(struct drm_gem_object *obj, static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) { if (r == -EDEADLK) { - radeon_mutex_lock(&rdev->cs_mutex); r = radeon_gpu_reset(rdev); if (!r) r = -EAGAIN; - radeon_mutex_unlock(&rdev->cs_mutex); } return r; }
From: Christian Koenig christian.koenig@amd.com
The shader preemption on cayman doesn't work correctly with multiple rings. So to be able to still make use of the compute rings we use a semaphore to make sure that only one IB can execute at the same time.
This isn't as effective as shader preemption, but also isn't as bad as putting everything on the GFX ring.
Signed-off-by: Christian Koenig christian.koenig@amd.com --- drivers/gpu/drm/radeon/ni.c | 142 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_cs.c | 2 +- 3 files changed, 139 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9d9f5ac..6a3e8a8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1125,13 +1125,75 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); }
+/* The shader preemption on cayman doesn't work + * correctly with multiple rings. So to be able to + * still make use of the compute rings we use a + * semaphore to make sure that only one IB can execute + * at the same time + */ +static void cayman_cp_ring_create_workaround(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r; + + r = radeon_semaphore_create(rdev, &rdev->cayman_ring_lock); + if (r) { + dev_err(rdev->dev, "Can't allocate " + "cayman_ring_lock (%d)!\n", r); + return; + } + + r = radeon_ring_alloc(rdev, ring, 8); + if (r) { + dev_err(rdev->dev, "Can't initialize " + "cayman_ring_lock (%d)!\n", r); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); + return; + } + + radeon_semaphore_emit_signal(rdev, RADEON_RING_TYPE_GFX_INDEX, + rdev->cayman_ring_lock); + + radeon_ring_commit(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); +} + +static void cayman_cp_ring_cleanup_workaround(struct radeon_device *rdev) +{ + struct radeon_fence *fence; + int r; + + r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "Can't cleanup " + "cayman_ring_lock (%d)!\n", r); + return; + } + + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, fence); + radeon_fence_unref(&fence); +} + void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring];
+ if (ib->ring != RADEON_RING_TYPE_GFX_INDEX) { + if (rdev->cayman_ring_lock == NULL) { + cayman_cp_ring_create_workaround(rdev); + } + } else { + if (rdev->cayman_ring_lock != NULL && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP1_INDEX) && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP2_INDEX)) { + cayman_cp_ring_cleanup_workaround(rdev); + } + } + /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); radeon_ring_write(ring, 1); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_wait(rdev, ib->ring, rdev->cayman_ring_lock); radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -1140,6 +1202,8 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_signal(rdev, ib->ring, rdev->cayman_ring_lock);
/* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); @@ -1190,6 +1254,25 @@ static int cayman_cp_load_microcode(struct radeon_device *rdev) return 0; }
+static int cayman_cp_start_compute(struct radeon_device *rdev, int ridx) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + int r; + + r = radeon_ring_lock(rdev, ring, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + + /* clear the compute context state */ + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0) | 2); + radeon_ring_write(ring, 0); + + radeon_ring_unlock_commit(rdev, ring); + return 0; +} + static int cayman_cp_start(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -1251,7 +1334,17 @@ static int cayman_cp_start(struct radeon_device *rdev)
radeon_ring_unlock_commit(rdev, ring);
- /* XXX init other rings */ + r = cayman_cp_start_compute(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + if (r) + return r;
return 0; } @@ -1377,14 +1470,24 @@ int cayman_cp_resume(struct radeon_device *rdev)
/* start the rings */ cayman_cp_start(rdev); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; - rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; - /* this only test cp0 */ r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); if (r) { rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; return r; } @@ -1468,7 +1571,7 @@ int cayman_asic_reset(struct radeon_device *rdev)
static int cayman_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r;
/* enable pcie gen2 link */ @@ -1556,11 +1659,27 @@ static int cayman_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, + CP_RB1_RPTR, CP_RB1_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, + CP_RB2_RPTR, CP_RB2_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + r = cayman_cp_load_microcode(rdev); if (r) return r; @@ -1607,6 +1726,7 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_ib_pool_suspend(rdev); radeon_vm_manager_suspend(rdev); r600_blit_suspend(rdev); @@ -1626,7 +1746,7 @@ int cayman_suspend(struct radeon_device *rdev) */ int cayman_init(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r;
/* Read BIOS */ @@ -1675,9 +1795,18 @@ int cayman_init(struct radeon_device *rdev) if (r) return r;
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -1734,6 +1863,7 @@ void cayman_fini(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); radeon_wb_fini(rdev); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_vm_manager_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b24877f..dde3c13 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1522,6 +1522,8 @@ struct radeon_device { /* virtual memory */ struct mutex vm_mutex; struct radeon_vm_manager vm_manager; + /* workaround for defect in caymans compute rings */ + struct radeon_semaphore *cayman_ring_lock; };
int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d1ead9c..54f3ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -103,7 +103,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE: - if (p->rdev->family >= CHIP_TAHITI) { + if (p->rdev->family >= CHIP_CAYMAN) { if (p->priority > 0) p->ring = CAYMAN_RING_TYPE_CP1_INDEX; else
dri-devel@lists.freedesktop.org